From 49a0555f12776ae96c7c3cd7a52b7e323807b75f Mon Sep 17 00:00:00 2001 From: Johannes Aubart Date: Thu, 27 Nov 2025 15:51:53 +0100 Subject: [PATCH 1/5] fix copy-paste error in clusterrequest api --- api/clusters/v1alpha1/clusterrequest_types.go | 2 +- api/clusters/v1alpha1/zz_generated.deepcopy.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/clusters/v1alpha1/clusterrequest_types.go b/api/clusters/v1alpha1/clusterrequest_types.go index f3f1ebdf..9369c2e6 100644 --- a/api/clusters/v1alpha1/clusterrequest_types.go +++ b/api/clusters/v1alpha1/clusterrequest_types.go @@ -78,7 +78,7 @@ type ClusterRequest struct { type ClusterRequestList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` - Items []Cluster `json:"items"` + Items []ClusterRequest `json:"items"` } func init() { diff --git a/api/clusters/v1alpha1/zz_generated.deepcopy.go b/api/clusters/v1alpha1/zz_generated.deepcopy.go index 454ddbf3..2c204282 100644 --- a/api/clusters/v1alpha1/zz_generated.deepcopy.go +++ b/api/clusters/v1alpha1/zz_generated.deepcopy.go @@ -299,7 +299,7 @@ func (in *ClusterRequestList) DeepCopyInto(out *ClusterRequestList) { in.ListMeta.DeepCopyInto(&out.ListMeta) if in.Items != nil { in, out := &in.Items, &out.Items - *out = make([]Cluster, len(*in)) + *out = make([]ClusterRequest, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } From 91b889eb5e728e216b9a8ba82c27cbef7dc2931d Mon Sep 17 00:00:00 2001 From: Johannes Aubart Date: Thu, 27 Nov 2025 16:12:12 +0100 Subject: [PATCH 2/5] use deterministic hash instead of generateName --- api/clusters/v1alpha1/constants.go | 3 + api/clusters/v1alpha1/constants/reasons.go | 2 + internal/controllers/scheduler/controller.go | 129 ++++++++++++++++--- 3 files changed, 118 insertions(+), 16 deletions(-) diff --git a/api/clusters/v1alpha1/constants.go b/api/clusters/v1alpha1/constants.go index 11789c82..24381c62 100644 --- a/api/clusters/v1alpha1/constants.go +++ b/api/clusters/v1alpha1/constants.go @@ -66,6 +66,9 @@ const ( DeleteWithoutRequestsLabel = GroupName + "/delete-without-requests" // ProfileLabel is used to make the profile information easily accessible on AccessRequests. ProfileLabel = GroupName + "/profile" + // RandomizeClusterNameLabel can be set to "true" on ClusterRequests to have the corresponding Cluster get a randomized name. + // This is meant as a tool for operators to resolve conflicts, which itself should be rare. + RandomizeClusterNameLabel = GroupName + "/randomize-cluster-name" ) const ( diff --git a/api/clusters/v1alpha1/constants/reasons.go b/api/clusters/v1alpha1/constants/reasons.go index e47f2383..48e327cf 100644 --- a/api/clusters/v1alpha1/constants/reasons.go +++ b/api/clusters/v1alpha1/constants/reasons.go @@ -25,4 +25,6 @@ const ( ReasonWaitingForServices = "WaitingForServices" // ReasonWaitingForServiceDeletion indicates that something is waiting for a service to be deleted. ReasonWaitingForServiceDeletion = "WaitingForServiceDeletion" + // ReasonClusterConflict indicates that another cluster with the same name and namespace conflicts with the desired one. + ReasonClusterConflict = "ClusterConflict" ) diff --git a/internal/controllers/scheduler/controller.go b/internal/controllers/scheduler/controller.go index 6a627772..54e5bc55 100644 --- a/internal/controllers/scheduler/controller.go +++ b/internal/controllers/scheduler/controller.go @@ -17,6 +17,7 @@ import ( "github.com/openmcp-project/controller-utils/pkg/clusters" "github.com/openmcp-project/controller-utils/pkg/collections/filters" + maputils "github.com/openmcp-project/controller-utils/pkg/collections/maps" ctrlutils "github.com/openmcp-project/controller-utils/pkg/controller" errutils "github.com/openmcp-project/controller-utils/pkg/errors" "github.com/openmcp-project/controller-utils/pkg/logging" @@ -237,15 +238,82 @@ func (r *ClusterScheduler) handleCreateOrUpdate(ctx context.Context, req reconci } } } else { - cluster = r.initializeNewCluster(ctx, cr, cDef) + var err error + cluster, err = r.initializeNewCluster(ctx, cr, cDef) + if err != nil { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("error initializing new cluster: %w", err), cconst.ReasonInternalError) + return rr + } - // create Cluster resource - if err := r.PlatformCluster.Client().Create(ctx, cluster); err != nil { - if apierrors.IsAlreadyExists(err) { - rr.ReconcileError = errutils.WithReason(fmt.Errorf("cluster '%s/%s' already exists, this is not supposed to happen", cluster.Namespace, cluster.Name), cconst.ReasonInternalError) - return rr + // check for conflicts + // A conflict occurs if + // - a cluster with the same name and namespace already exists + // - and it does not have a finalizer referencing this ClusterRequest + // - but it has a finalizer referencing another ClusterRequest + // - and this other ClusterRequest still exists + conflict := false + fin := cr.FinalizerForCluster() + existingCluster := &clustersv1alpha1.Cluster{} + existingCluster.Namespace = cluster.Namespace + existingCluster.Name = cluster.Name + finalizersToRemove := sets.New[string]() + if err := r.PlatformCluster.Client().Get(ctx, client.ObjectKeyFromObject(existingCluster), existingCluster); err == nil { + var crs *clustersv1alpha1.ClusterRequestList + for _, cfin := range existingCluster.Finalizers { + if cfin == fin { + conflict = false + break + } + // if we have not already found a conflict, check if the cluster contains a finalizer from another request which still exists + if !conflict && strings.HasPrefix(cfin, clustersv1alpha1.RequestFinalizerOnClusterPrefix) { + // check if the other request still exists + if crs == nil { + // fetch existing ClusterRequests if not done already + crs = &clustersv1alpha1.ClusterRequestList{} + if err := r.PlatformCluster.Client().List(ctx, crs); err != nil { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("error listing ClusterRequests to check for conflicts: %w", err), cconst.ReasonPlatformClusterInteractionProblem) + return rr + } + found := false + for _, ecr := range crs.Items { + if cfin == ecr.FinalizerForCluster() { + conflict = true + break + } + } + if !found { + // the finalizer does not belong to any existing ClusterRequest, let's remove it + finalizersToRemove.Insert(cfin) + } + } + } } - rr.ReconcileError = errutils.WithReason(fmt.Errorf("error creating cluster '%s/%s': %w", cluster.Namespace, cluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) + } else if !apierrors.IsNotFound(err) { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("error checking whether cluster '%s/%s' exists: %w", cluster.Namespace, cluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) + return rr + } + + if conflict { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("cluster '%s/%s' already exists and is used by a different ClusterRequest (the '%s' label with value 'true' can be set on the ClusterRequest to randomize the cluster name and avoid this conflict)", cluster.Namespace, cluster.Name, clustersv1alpha1.RandomizeClusterNameLabel), cconst.ReasonClusterConflict) + return rr + } + + // create/update Cluster resource + // Note that clusters are usually not updated. This should only happen if the status of a ClusterRequest was lost and an existing cluster is recovered. + if _, err := controllerutil.CreateOrUpdate(ctx, r.PlatformCluster.Client(), existingCluster, func() error { + // merge finalizers, labels, and annotations from initialized cluster into existing one + finSet := sets.New(existingCluster.Finalizers...) + finSet.Delete(finalizersToRemove.UnsortedList()...) + finSet.Insert(cluster.Finalizers...) + existingCluster.Finalizers = sets.List(finSet) + existingCluster.Labels = maputils.Merge(existingCluster.Labels, cluster.Labels) + existingCluster.Annotations = maputils.Merge(existingCluster.Annotations, cluster.Annotations) + // copy spec from initialized cluster + existingCluster.Spec = cluster.Spec + + return nil + }); err != nil { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("error creating/updating cluster '%s/%s': %w", cluster.Namespace, cluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) return rr } } @@ -377,23 +445,34 @@ func (r *ClusterScheduler) fetchRelevantClusters(ctx context.Context, cr *cluste } // initializeNewCluster creates a new Cluster resource based on the given ClusterRequest and ClusterDefinition. -func (r *ClusterScheduler) initializeNewCluster(ctx context.Context, cr *clustersv1alpha1.ClusterRequest, cDef *config.ClusterDefinition) *clustersv1alpha1.Cluster { +func (r *ClusterScheduler) initializeNewCluster(ctx context.Context, cr *clustersv1alpha1.ClusterRequest, cDef *config.ClusterDefinition) (*clustersv1alpha1.Cluster, error) { log := logging.FromContextOrPanic(ctx) purpose := cr.Spec.Purpose cluster := &clustersv1alpha1.Cluster{} // choose a name for the cluster // priority as follows: // - for singleton clusters (shared unlimited): - // 1. generateName of template + // 1. generateName of template (1) // 2. name of template // 3. purpose // - for exclusive clusters or shared limited: - // 1. generateName of template - // 2. purpose used as generateName + // 1. generateName of template (1) + // 2. purpose used as generateName (1) + // + // (1) Note that the kubernetes 'generateName' field will only be used if the ClusterRequest has the 'clusters.openmcp.cloud/randomize-cluster-name' label set to 'true'. + // Otherwise, a random-looking but deterministic name based on a hash of name and namespace of the ClusterRequest will be used. if cDef.Template.Spec.Tenancy == clustersv1alpha1.TENANCY_SHARED && cDef.TenancyCount == 0 { // there will only be one instance of this cluster if cDef.Template.GenerateName != "" { - cluster.SetGenerateName(cDef.Template.GenerateName) + if ctrlutils.HasLabelWithValue(cr, clustersv1alpha1.RandomizeClusterNameLabel, "true") { + cluster.SetGenerateName(cDef.Template.GenerateName) + } else { + name, err := GenerateClusterName(cDef.Template.GenerateName, cr) + if err != nil { + return nil, fmt.Errorf("error generating cluster name: %w", err) + } + cluster.SetName(name) + } } else if cDef.Template.Name != "" { cluster.SetName(cDef.Template.Name) } else { @@ -401,10 +480,18 @@ func (r *ClusterScheduler) initializeNewCluster(ctx context.Context, cr *cluster } } else { // there might be multiple instances of this cluster + prefix := purpose + "-" if cDef.Template.GenerateName != "" { - cluster.SetGenerateName(cDef.Template.GenerateName) + prefix = cDef.Template.GenerateName + } + if ctrlutils.HasLabelWithValue(cr, clustersv1alpha1.RandomizeClusterNameLabel, "true") { + cluster.SetGenerateName(prefix) } else { - cluster.SetGenerateName(purpose + "-") + name, err := GenerateClusterName(prefix, cr) + if err != nil { + return nil, fmt.Errorf("error generating cluster name: %w", err) + } + cluster.SetName(name) } } // choose a namespace for the cluster @@ -428,7 +515,7 @@ func (r *ClusterScheduler) initializeNewCluster(ctx context.Context, cr *cluster } } cluster.SetAnnotations(cDef.Template.Annotations) - cluster.Spec = cDef.Template.Spec + cluster.Spec = *cDef.Template.Spec.DeepCopy() // set purpose, if not set if len(cluster.Spec.Purposes) == 0 { @@ -439,5 +526,15 @@ func (r *ClusterScheduler) initializeNewCluster(ctx context.Context, cr *cluster } } - return cluster + return cluster, nil +} + +// GenerateClusterName generates a deterministic name for a new Cluster based on a prefix and the corresponding ClusterRequest. +// The name will always contain the prefix, followed by a hash of the ClusterRequest's namespace and name. +func GenerateClusterName(prefix string, cr *clustersv1alpha1.ClusterRequest) (string, error) { + suffix, err := ctrlutils.ShortenToXCharacters(ctrlutils.NameHashSHAKE128Base32(cr.Namespace, cr.Name), ctrlutils.K8sMaxNameLength-len(prefix)) + if err != nil { + return "", err + } + return prefix + suffix, nil } From d936d4bbcebffe72546cedfb10c5b6f8a409a61f Mon Sep 17 00:00:00 2001 From: Johannes Aubart Date: Fri, 28 Nov 2025 11:06:07 +0100 Subject: [PATCH 3/5] add tests and fix bugs --- internal/controllers/scheduler/controller.go | 21 +++-- .../controllers/scheduler/controller_test.go | 91 +++++++++++++++++++ 2 files changed, 106 insertions(+), 6 deletions(-) diff --git a/internal/controllers/scheduler/controller.go b/internal/controllers/scheduler/controller.go index 54e5bc55..fecdfdae 100644 --- a/internal/controllers/scheduler/controller.go +++ b/internal/controllers/scheduler/controller.go @@ -288,7 +288,9 @@ func (r *ClusterScheduler) handleCreateOrUpdate(ctx context.Context, req reconci } } } - } else if !apierrors.IsNotFound(err) { + } else if apierrors.IsNotFound(err) { + existingCluster = nil + } else { rr.ReconcileError = errutils.WithReason(fmt.Errorf("error checking whether cluster '%s/%s' exists: %w", cluster.Namespace, cluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) return rr } @@ -300,7 +302,14 @@ func (r *ClusterScheduler) handleCreateOrUpdate(ctx context.Context, req reconci // create/update Cluster resource // Note that clusters are usually not updated. This should only happen if the status of a ClusterRequest was lost and an existing cluster is recovered. - if _, err := controllerutil.CreateOrUpdate(ctx, r.PlatformCluster.Client(), existingCluster, func() error { + if existingCluster == nil { + log.Info("Creating new cluster for request", "clusterName", cluster.Name, "clusterNamespace", cluster.Namespace) + if err := r.PlatformCluster.Client().Create(ctx, cluster); err != nil { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("error creating cluster '%s/%s': %w", cluster.Namespace, cluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) + return rr + } + } else { + log.Info("Recovering existing cluster for request", "clusterName", cluster.Name, "clusterNamespace", cluster.Namespace) // merge finalizers, labels, and annotations from initialized cluster into existing one finSet := sets.New(existingCluster.Finalizers...) finSet.Delete(finalizersToRemove.UnsortedList()...) @@ -311,10 +320,10 @@ func (r *ClusterScheduler) handleCreateOrUpdate(ctx context.Context, req reconci // copy spec from initialized cluster existingCluster.Spec = cluster.Spec - return nil - }); err != nil { - rr.ReconcileError = errutils.WithReason(fmt.Errorf("error creating/updating cluster '%s/%s': %w", cluster.Namespace, cluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) - return rr + if err := r.PlatformCluster.Client().Update(ctx, existingCluster); err != nil { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("error updating existing cluster '%s/%s': %w", existingCluster.Namespace, existingCluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) + return rr + } } } diff --git a/internal/controllers/scheduler/controller_test.go b/internal/controllers/scheduler/controller_test.go index 9820d70c..255f4b43 100644 --- a/internal/controllers/scheduler/controller_test.go +++ b/internal/controllers/scheduler/controller_test.go @@ -3,6 +3,7 @@ package scheduler_test import ( "fmt" "path/filepath" + "strings" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -489,7 +490,97 @@ var _ = Describe("Scheduler", func() { Expect(cr.Status.Cluster).ToNot(BeNil()) Expect(cr.Status.Cluster.Name).ToNot(Equal(c.Name), "Cluster is in deletion and should not be considered for scheduling") Expect(cr.Status.Cluster.Namespace).To(Equal(c.Namespace)) + }) + + It("should choose the Cluster name correctly", func() { + clusterNamespace := "exclusive" + sc, env := defaultTestSetup("testdata", "test-01") + Expect(sc.Config.Scope).To(Equal(config.SCOPE_NAMESPACED)) + Expect(env.Client().DeleteAllOf(env.Ctx, &clustersv1alpha1.Cluster{}, client.InNamespace(clusterNamespace))).To(Succeed()) + existingClusters := &clustersv1alpha1.ClusterList{} + Expect(env.Client().List(env.Ctx, existingClusters, client.InNamespace(clusterNamespace))).To(Succeed()) + Expect(existingClusters.Items).To(BeEmpty()) + + req := &clustersv1alpha1.ClusterRequest{} + Expect(env.Client().Get(env.Ctx, ctrlutils.ObjectKey("exclusive", "foo"), req)).To(Succeed()) + Expect(req.Status.Cluster).To(BeNil()) + + env.ShouldReconcile(testutils.RequestFromObject(req)) + Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req), req)).To(Succeed()) + Expect(req.Status.Cluster).ToNot(BeNil()) + + Expect(env.Client().List(env.Ctx, existingClusters, client.InNamespace(clusterNamespace))).To(Succeed()) + Expect(existingClusters.Items).To(HaveLen(1)) + cluster := existingClusters.Items[0] + Expect(cluster.Name).To(Equal(req.Status.Cluster.Name)) + expectedName, err := scheduler.GenerateClusterName(req.Spec.Purpose+"-", req) + Expect(err).ToNot(HaveOccurred()) + Expect(cluster.Name).To(Equal(expectedName)) + + req2 := &clustersv1alpha1.ClusterRequest{} + req2.SetName("exclusive-2") + req2.SetNamespace("foo") + req2.Spec.Purpose = "exclusive" + req2.Labels = map[string]string{ + clustersv1alpha1.RandomizeClusterNameLabel: "true", + } + Expect(env.Client().Create(env.Ctx, req2)).To(Succeed()) + + env.ShouldReconcile(testutils.RequestFromObject(req2)) + Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req2), req2)).To(Succeed()) + Expect(req2.Status.Cluster).ToNot(BeNil()) + expectedName2, err := scheduler.GenerateClusterName(req2.Spec.Purpose+"-", req2) + Expect(req2.Status.Cluster.Name).ToNot(Equal(expectedName2)) + Expect(strings.HasPrefix(req2.Status.Cluster.Name, req2.Spec.Purpose+"-")).To(BeTrue()) + + cluster2 := &clustersv1alpha1.Cluster{} + Expect(env.Client().Get(env.Ctx, ctrlutils.ObjectKey(req2.Status.Cluster.Name, req2.Status.Cluster.Namespace), cluster2)).To(Succeed()) + Expect(cluster2.GenerateName).To(Equal(req2.Spec.Purpose + "-")) + }) + + It("should recreate a deleted cluster for an existing request", func() { + clusterNamespace := "exclusive" + sc, env := defaultTestSetup("testdata", "test-01") + Expect(sc.Config.Scope).To(Equal(config.SCOPE_NAMESPACED)) + Expect(env.Client().DeleteAllOf(env.Ctx, &clustersv1alpha1.Cluster{}, client.InNamespace(clusterNamespace))).To(Succeed()) + existingClusters := &clustersv1alpha1.ClusterList{} + Expect(env.Client().List(env.Ctx, existingClusters, client.InNamespace(clusterNamespace))).To(Succeed()) + Expect(existingClusters.Items).To(BeEmpty()) + + req := &clustersv1alpha1.ClusterRequest{} + Expect(env.Client().Get(env.Ctx, ctrlutils.ObjectKey("exclusive", "foo"), req)).To(Succeed()) + Expect(req.Status.Cluster).To(BeNil()) + + env.ShouldReconcile(testutils.RequestFromObject(req)) + Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req), req)).To(Succeed()) + Expect(req.Status.Cluster).ToNot(BeNil()) + + Expect(env.Client().List(env.Ctx, existingClusters, client.InNamespace(clusterNamespace))).To(Succeed()) + Expect(existingClusters.Items).To(HaveLen(1)) + cluster := existingClusters.Items[0] + Expect(cluster.Name).To(Equal(req.Status.Cluster.Name)) + Expect(cluster.Namespace).To(Equal(clusterNamespace)) + + // remove all finalizers and delete the cluster + cluster.Finalizers = []string{} + Expect(env.Client().Update(env.Ctx, &cluster)).To(Succeed()) + Expect(env.Client().Delete(env.Ctx, &cluster)).To(Succeed()) + Eventually(func() bool { + err := env.Client().Get(env.Ctx, client.ObjectKeyFromObject(&cluster), &cluster) + return apierrors.IsNotFound(err) + }, 3).Should(BeTrue(), "Cluster should be deleted") + + // reconcile again, this should recreate the cluster + env.ShouldReconcile(testutils.RequestFromObject(req)) + Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req), req)).To(Succeed()) + Expect(req.Status.Cluster).ToNot(BeNil()) + Expect(env.Client().List(env.Ctx, existingClusters, client.InNamespace(clusterNamespace))).To(Succeed()) + Expect(existingClusters.Items).To(HaveLen(1)) + newCluster := existingClusters.Items[0] + Expect(newCluster.Name).To(Equal(req.Status.Cluster.Name)) + Expect(newCluster.Namespace).To(Equal(clusterNamespace)) + Expect(newCluster.Name).To(Equal(cluster.Name), "Recreated cluster should have the same name") }) }) From f17c784781be6f062e0981289559bef65ec5fc1e Mon Sep 17 00:00:00 2001 From: Johannes Aubart Date: Fri, 28 Nov 2025 11:20:30 +0100 Subject: [PATCH 4/5] implement recreation of lost clusters still referenced by ClusterRequests --- internal/controllers/scheduler/controller.go | 31 +++++++++++++++++-- .../controllers/scheduler/controller_test.go | 19 ++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/internal/controllers/scheduler/controller.go b/internal/controllers/scheduler/controller.go index fecdfdae..0592d343 100644 --- a/internal/controllers/scheduler/controller.go +++ b/internal/controllers/scheduler/controller.go @@ -136,10 +136,35 @@ func (r *ClusterScheduler) handleCreateOrUpdate(ctx context.Context, req reconci // check if request is already granted if cr.Status.Cluster != nil { - log.Info("Request already contains a cluster reference, nothing to do", "clusterName", cr.Status.Cluster.Name, "clusterNamespace", cr.Status.Cluster.Namespace) - return rr + // verify that the referenced cluster still exists + existingCluster := &clustersv1alpha1.Cluster{} + existingCluster.Namespace = cr.Status.Cluster.Namespace + existingCluster.Name = cr.Status.Cluster.Name + if err := r.PlatformCluster.Client().Get(ctx, client.ObjectKeyFromObject(existingCluster), existingCluster); err == nil { + fin := cr.FinalizerForCluster() + if !controllerutil.ContainsFinalizer(existingCluster, fin) { + log.Info("Referenced cluster does not contain expected finalizer, adding it", "clusterName", existingCluster.Name, "clusterNamespace", existingCluster.Namespace, "finalizer", fin) + oldCluster := existingCluster.DeepCopy() + controllerutil.AddFinalizer(existingCluster, fin) + if err := r.PlatformCluster.Client().Patch(ctx, existingCluster, client.MergeFrom(oldCluster)); err != nil { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("error patching finalizer '%s' on cluster '%s/%s': %w", fin, existingCluster.Namespace, existingCluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) + return rr + } + } + log.Info("Request already contains a reference to an existing cluster, nothing to do", "clusterName", cr.Status.Cluster.Name, "clusterNamespace", cr.Status.Cluster.Namespace) + return rr + } else if !apierrors.IsNotFound(err) { + rr.ReconcileError = errutils.WithReason(fmt.Errorf("error checking whether cluster '%s/%s' exists: %w", existingCluster.Namespace, existingCluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem) + return rr + } else { + log.Info("Referenced cluster does not exist, resetting request status to recreate it", "clusterName", existingCluster.Name, "clusterNamespace", existingCluster.Namespace) + rr.Object.Status.Cluster = nil + rr.Result.RequeueAfter = 1 + return rr + } + } else { + log.Debug("Request status does not contain a cluster reference, checking for existing clusters with referencing finalizers") } - log.Debug("Request status does not contain a cluster reference, checking for existing clusters with referencing finalizers") // fetch cluster definition purpose := cr.Spec.Purpose diff --git a/internal/controllers/scheduler/controller_test.go b/internal/controllers/scheduler/controller_test.go index 255f4b43..b35f9bac 100644 --- a/internal/controllers/scheduler/controller_test.go +++ b/internal/controllers/scheduler/controller_test.go @@ -538,7 +538,7 @@ var _ = Describe("Scheduler", func() { Expect(cluster2.GenerateName).To(Equal(req2.Spec.Purpose + "-")) }) - It("should recreate a deleted cluster for an existing request", func() { + It("should restore a missing finalizer and recreate a deleted cluster for an existing request", func() { clusterNamespace := "exclusive" sc, env := defaultTestSetup("testdata", "test-01") Expect(sc.Config.Scope).To(Equal(config.SCOPE_NAMESPACED)) @@ -561,6 +561,14 @@ var _ = Describe("Scheduler", func() { Expect(cluster.Name).To(Equal(req.Status.Cluster.Name)) Expect(cluster.Namespace).To(Equal(clusterNamespace)) + // remove finalizer from cluster and reconcile, which should add it again + cluster.Finalizers = []string{} + Expect(env.Client().Update(env.Ctx, &cluster)).To(Succeed()) + Expect(cluster.Finalizers).To(BeEmpty()) + env.ShouldReconcile(testutils.RequestFromObject(req)) + Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(&cluster), &cluster)).To(Succeed()) + Expect(cluster.Finalizers).To(ContainElements(req.FinalizerForCluster()), "Finalizer should be restored on cluster") + // remove all finalizers and delete the cluster cluster.Finalizers = []string{} Expect(env.Client().Update(env.Ctx, &cluster)).To(Succeed()) @@ -570,7 +578,14 @@ var _ = Describe("Scheduler", func() { return apierrors.IsNotFound(err) }, 3).Should(BeTrue(), "Cluster should be deleted") - // reconcile again, this should recreate the cluster + // reconciliation should remove the request status and set it to pending + rr := env.ShouldReconcile(testutils.RequestFromObject(req)) + Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req), req)).To(Succeed()) + Expect(req.Status.Cluster).To(BeNil()) + Expect(req.Status.Phase).To(Equal(clustersv1alpha1.REQUEST_PENDING)) + Expect(rr.RequeueAfter).ToNot(BeZero(), "RequeueAfter should be set") + + // reconcile again, this should re-create the cluster env.ShouldReconcile(testutils.RequestFromObject(req)) Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req), req)).To(Succeed()) Expect(req.Status.Cluster).ToNot(BeNil()) From f41d3b5c944599e8ef1789d372f2755ac133ec4d Mon Sep 17 00:00:00 2001 From: Johannes Aubart Date: Fri, 28 Nov 2025 11:36:35 +0100 Subject: [PATCH 5/5] satisfy linter --- internal/controllers/scheduler/controller.go | 1 + internal/controllers/scheduler/controller_test.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/internal/controllers/scheduler/controller.go b/internal/controllers/scheduler/controller.go index 0592d343..a2756a79 100644 --- a/internal/controllers/scheduler/controller.go +++ b/internal/controllers/scheduler/controller.go @@ -116,6 +116,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, req reconcile.Request) return rr } +//nolint:gocyclo func (r *ClusterScheduler) handleCreateOrUpdate(ctx context.Context, req reconcile.Request, cr *clustersv1alpha1.ClusterRequest) ReconcileResult { log := logging.FromContextOrPanic(ctx) rr := ReconcileResult{ diff --git a/internal/controllers/scheduler/controller_test.go b/internal/controllers/scheduler/controller_test.go index b35f9bac..925d2232 100644 --- a/internal/controllers/scheduler/controller_test.go +++ b/internal/controllers/scheduler/controller_test.go @@ -1,3 +1,4 @@ +//nolint:goconst package scheduler_test import ( @@ -530,6 +531,7 @@ var _ = Describe("Scheduler", func() { Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req2), req2)).To(Succeed()) Expect(req2.Status.Cluster).ToNot(BeNil()) expectedName2, err := scheduler.GenerateClusterName(req2.Spec.Purpose+"-", req2) + Expect(err).ToNot(HaveOccurred()) Expect(req2.Status.Cluster.Name).ToNot(Equal(expectedName2)) Expect(strings.HasPrefix(req2.Status.Cluster.Name, req2.Spec.Purpose+"-")).To(BeTrue())