diff --git a/Makefile b/Makefile index 7194ba75c71d..5c50caff88fb 100644 --- a/Makefile +++ b/Makefile @@ -603,6 +603,7 @@ generate-e2e-templates-v1.11: $(KUSTOMIZE) generate-e2e-templates-main: $(KUSTOMIZE) $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-md-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-md-remediation.yaml + $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-md-taints --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-md-taints.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-remediation.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption/step1 --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml echo "---" >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml diff --git a/docs/book/src/tasks/experimental-features/experimental-features.md b/docs/book/src/tasks/experimental-features/experimental-features.md index a74b4e0c2420..15b20b97e618 100644 --- a/docs/book/src/tasks/experimental-features/experimental-features.md +++ b/docs/book/src/tasks/experimental-features/experimental-features.md @@ -16,6 +16,7 @@ Currently Cluster API has the following experimental features: * `KubeadmBootstrapFormatIgnition` (env var: `EXP_KUBEADM_BOOTSTRAP_FORMAT_IGNITION`): [Ignition](./ignition.md) * `MachineTaintPropagation` (env var: `EXP_MACHINE_TAINT_PROPAGATION`): * Allows in-place propagation of taints to nodes using the taint fields within Machines, MachineSets, and MachineDeployments. + * In future this feature is planned to also cover topology clusters and KCP. See the proposal [Propagating taints from Cluster API to Nodes](https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20250513-propogate-taints.md) for more information. ## Enabling Experimental Features for Management Clusters Started with clusterctl diff --git a/docs/proposals/20250513-propogate-taints.md b/docs/proposals/20250513-propogate-taints.md index 148ae6b84d7b..73c381b331bc 100644 --- a/docs/proposals/20250513-propogate-taints.md +++ b/docs/proposals/20250513-propogate-taints.md @@ -556,6 +556,7 @@ This allows to gain experience and the ability to do adjustments and graduate th - [ ] 2025-06-06: Open proposal PR - [ ] 2025-10-13: Reworked the proposal based on feedback - [ ] 2025-10-21: Review feedback and discussions +- [ ] 2025-11-12: Implementation for Machine, MachineSet and MachineDeployments [community meeting]: https://docs.google.com/document/d/1ushaVqAKYnZ2VN_aa3GyKlS4kEd6bSug13xaXOakAQI/edit#heading=h.pxsq37pzkbdq diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml index d7d914b123ae..a6fe66837f96 100644 --- a/test/e2e/config/docker.yaml +++ b/test/e2e/config/docker.yaml @@ -332,6 +332,7 @@ providers: # Add cluster templates - sourcePath: "../data/infrastructure-docker/main/cluster-template.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-md-remediation.yaml" + - sourcePath: "../data/infrastructure-docker/main/cluster-template-md-taints.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-remediation.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-adoption.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-machine-pool.yaml" @@ -400,6 +401,7 @@ variables: EXP_MACHINE_SET_PREFLIGHT_CHECKS: "true" EXP_PRIORITY_QUEUE: "false" EXP_IN_PLACE_UPDATES: "true" + EXP_MACHINE_TAINT_PROPAGATION: "true" CAPI_DIAGNOSTICS_ADDRESS: ":8080" CAPI_INSECURE_DIAGNOSTICS: "true" diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-md-taints/kustomization.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-md-taints/kustomization.yaml new file mode 100644 index 000000000000..75efc26cec88 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-md-taints/kustomization.yaml @@ -0,0 +1,7 @@ +resources: +- ../bases/cluster-with-kcp.yaml +- ../bases/md.yaml +- ../bases/crs.yaml + +patches: +- path: md-taints.yaml diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-md-taints/md-taints.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-md-taints/md-taints.yaml new file mode 100644 index 000000000000..610de62660f7 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-md-taints/md-taints.yaml @@ -0,0 +1,16 @@ +apiVersion: cluster.x-k8s.io/v1beta2 +kind: MachineDeployment +metadata: + name: "${CLUSTER_NAME}-md-0" +spec: + template: + spec: + taints: + - key: "pre-existing-on-initialization-taint" + value: "on-initialization-value" + effect: PreferNoSchedule + propagation: OnInitialization + - key: "pre-existing-always-taint" + value: "always-value" + effect: PreferNoSchedule + propagation: Always diff --git a/test/e2e/md_rollout.go b/test/e2e/md_rollout.go index a08520422fb5..29f3469785b1 100644 --- a/test/e2e/md_rollout.go +++ b/test/e2e/md_rollout.go @@ -21,15 +21,19 @@ import ( "fmt" "os" "path/filepath" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/patch" ) // MachineDeploymentRolloutSpecInput is the input for MachineDeploymentRolloutSpec. @@ -135,6 +139,147 @@ func MachineDeploymentRolloutSpec(ctx context.Context, inputGetter func() Machin Namespace: clusterResources.Cluster.Namespace, }) + // Get all machines before doing in-place changes so we can check at the end that no machines were replaced. + machinesBeforeInPlaceChanges := getMachinesByCluster(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster) + + preExistingAlwaysTaint := clusterv1.MachineTaint{ + Key: "pre-existing-always-taint", + Value: "always-value", + Effect: corev1.TaintEffectPreferNoSchedule, + Propagation: clusterv1.MachineTaintPropagationAlways, + } + + preExistingOnInitializationTaint := clusterv1.MachineTaint{ + Key: "pre-existing-on-initialization-taint", + Value: "on-initialization-value", + Effect: corev1.TaintEffectPreferNoSchedule, + Propagation: clusterv1.MachineTaintPropagationOnInitialization, + } + addingAlwaysTaint := clusterv1.MachineTaint{ + Key: "added-always-taint", + Value: "added-always-value", + Effect: corev1.TaintEffectPreferNoSchedule, + Propagation: clusterv1.MachineTaintPropagationAlways, + } + + addingOnInitializationTaint := clusterv1.MachineTaint{ + Key: "added-on-initialization-taint", + Value: "added-on-initialization-value", + Effect: corev1.TaintEffectPreferNoSchedule, + Propagation: clusterv1.MachineTaintPropagationOnInitialization, + } + + wantMachineTaints := []clusterv1.MachineTaint{ + preExistingAlwaysTaint, + preExistingOnInitializationTaint, + } + wantNodeTaints := toCoreV1Taints( + preExistingAlwaysTaint, + preExistingOnInitializationTaint, + ) + + Byf("Verify MachineDeployment Machines and Nodes have the correct taints") + wlClient := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, clusterResources.Cluster.Namespace, clusterResources.Cluster.Name).GetClient() + verifyMachineAndNodeTaints(ctx, verifyMachineAndNodeTaintsInput{ + BootstrapClusterClient: input.BootstrapClusterProxy.GetClient(), + WorkloadClusterClient: wlClient, + ClusterName: clusterResources.Cluster.Name, + MachineDeployments: clusterResources.MachineDeployments, + MachineTaints: wantMachineTaints, + NodeTaints: wantNodeTaints, + }) + + Byf("Verify in-place propagation by adding new taints to the MachineDeployment") + wantMachineTaints = []clusterv1.MachineTaint{ + preExistingAlwaysTaint, + preExistingOnInitializationTaint, + addingAlwaysTaint, + addingOnInitializationTaint, + } + wantNodeTaints = toCoreV1Taints( + preExistingAlwaysTaint, + preExistingOnInitializationTaint, + addingAlwaysTaint, + ) + for _, md := range clusterResources.MachineDeployments { + patchHelper, err := patch.NewHelper(md, input.BootstrapClusterProxy.GetClient()) + Expect(err).ToNot(HaveOccurred()) + md.Spec.Template.Spec.Taints = wantMachineTaints + Expect(patchHelper.Patch(ctx, md)).To(Succeed()) + } + + verifyMachineAndNodeTaints(ctx, verifyMachineAndNodeTaintsInput{ + BootstrapClusterClient: input.BootstrapClusterProxy.GetClient(), + WorkloadClusterClient: wlClient, + ClusterName: clusterResources.Cluster.Name, + MachineDeployments: clusterResources.MachineDeployments, + MachineTaints: wantMachineTaints, + NodeTaints: wantNodeTaints, + }) + + Byf("Verify in-place propagation when removing preExisting Always and OnInitialization taint from the nodes") + nodes := corev1.NodeList{} + Expect(wlClient.List(ctx, &nodes)).To(Succeed()) + // Remove the initial taints from the nodes. + for _, node := range nodes.Items { + patchHelper, err := patch.NewHelper(&node, wlClient) + Expect(err).ToNot(HaveOccurred()) + newTaints := []corev1.Taint{} + for _, taint := range node.Spec.Taints { + if taint.Key == preExistingAlwaysTaint.Key { + continue + } + if taint.Key == preExistingOnInitializationTaint.Key { + continue + } + newTaints = append(newTaints, taint) + } + node.Spec.Taints = newTaints + Expect(patchHelper.Patch(ctx, &node)).To(Succeed()) + } + + wantNodeTaints = toCoreV1Taints( + preExistingAlwaysTaint, + addingAlwaysTaint, + ) + + verifyMachineAndNodeTaints(ctx, verifyMachineAndNodeTaintsInput{ + BootstrapClusterClient: input.BootstrapClusterProxy.GetClient(), + WorkloadClusterClient: wlClient, + ClusterName: clusterResources.Cluster.Name, + MachineDeployments: clusterResources.MachineDeployments, + MachineTaints: wantMachineTaints, + NodeTaints: wantNodeTaints, + }) + + Byf("Verify in-place propagation by removing taints from the MachineDeployment") + wantMachineTaints = []clusterv1.MachineTaint{ + preExistingOnInitializationTaint, + addingOnInitializationTaint, + } + wantNodeTaints = toCoreV1Taints() + for _, md := range clusterResources.MachineDeployments { + patchHelper, err := patch.NewHelper(md, input.BootstrapClusterProxy.GetClient()) + Expect(err).ToNot(HaveOccurred()) + md.Spec.Template.Spec.Taints = wantMachineTaints + Expect(patchHelper.Patch(ctx, md)).To(Succeed()) + } + + verifyMachineAndNodeTaints(ctx, verifyMachineAndNodeTaintsInput{ + BootstrapClusterClient: input.BootstrapClusterProxy.GetClient(), + WorkloadClusterClient: wlClient, + ClusterName: clusterResources.Cluster.Name, + MachineDeployments: clusterResources.MachineDeployments, + MachineTaints: wantMachineTaints, + NodeTaints: wantNodeTaints, + }) + + By("Verifying there are no unexpected rollouts through in-place changes") + Consistently(func(g Gomega) { + machinesAfterInPlaceChanges := getMachinesByCluster(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster) + g.Expect(machinesAfterInPlaceChanges.Equal(machinesBeforeInPlaceChanges)).To(BeTrue(), "Machines must not be replaced through in-place rollout") + }, 30*time.Second, 1*time.Second).Should(Succeed()) + By("PASSED!") }) @@ -143,3 +288,52 @@ func MachineDeploymentRolloutSpec(ctx context.Context, inputGetter func() Machin framework.DumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ClusterctlConfigPath, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup) }) } + +type verifyMachineAndNodeTaintsInput struct { + BootstrapClusterClient client.Client + WorkloadClusterClient client.Client + ClusterName string + MachineDeployments []*clusterv1.MachineDeployment + MachineTaints []clusterv1.MachineTaint + NodeTaints []corev1.Taint +} + +func verifyMachineAndNodeTaints(ctx context.Context, input verifyMachineAndNodeTaintsInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for verifyMachineAndNodeTaints") + Expect(input.BootstrapClusterClient).ToNot(BeNil(), "Invalid argument. input.BootstrapClusterClient can't be nil when calling verifyMachineAndNodeTaints") + Expect(input.WorkloadClusterClient).ToNot(BeNil(), "Invalid argument. input.WorkloadClusterClient can't be nil when calling verifyMachineAndNodeTaints") + Expect(input.ClusterName).NotTo(BeEmpty(), "Invalid argument. input.ClusterName can't be empty when calling verifyMachineAndNodeTaints") + Expect(input.MachineDeployments).NotTo(BeNil(), "Invalid argument. input.MachineDeployments can't be nil when calling verifyMachineAndNodeTaints") + + Eventually(func(g Gomega) { + for _, md := range input.MachineDeployments { + machines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{ + Lister: input.BootstrapClusterClient, + ClusterName: input.ClusterName, + Namespace: md.Namespace, + MachineDeployment: *md, + }) + g.Expect(machines).To(HaveLen(int(ptr.Deref(md.Spec.Replicas, 0)))) + for _, machine := range machines { + g.Expect(machine.Spec.Taints).To(ConsistOf(input.MachineTaints)) + g.Expect(machine.Status.NodeRef.IsDefined()).To(BeTrue()) + + node := &corev1.Node{} + g.Expect(input.WorkloadClusterClient.Get(ctx, client.ObjectKey{Name: machine.Status.NodeRef.Name}, node)).To(Succeed()) + g.Expect(node.Spec.Taints).To(ConsistOf(input.NodeTaints)) + } + } + }, "1m").Should(Succeed()) +} + +func toCoreV1Taints(machineTaints ...clusterv1.MachineTaint) []corev1.Taint { + taints := []corev1.Taint{} + for _, machineTaint := range machineTaints { + taints = append(taints, corev1.Taint{ + Key: machineTaint.Key, + Value: machineTaint.Value, + Effect: machineTaint.Effect, + }) + } + return taints +} diff --git a/test/e2e/md_rollout_test.go b/test/e2e/md_rollout_test.go index b8ceb49ec155..8c326f771f98 100644 --- a/test/e2e/md_rollout_test.go +++ b/test/e2e/md_rollout_test.go @@ -23,13 +23,14 @@ import ( . "github.com/onsi/ginkgo/v2" ) -var _ = Describe("When testing MachineDeployment rolling upgrades", func() { +var _ = Describe("When testing MachineDeployment rolling upgrades and in-place taint propagation", func() { MachineDeploymentRolloutSpec(ctx, func() MachineDeploymentRolloutSpecInput { return MachineDeploymentRolloutSpecInput{ E2EConfig: e2eConfig, ClusterctlConfigPath: clusterctlConfigPath, BootstrapClusterProxy: bootstrapClusterProxy, ArtifactFolder: artifactFolder, + Flavor: "md-taints", SkipCleanup: skipCleanup, } })