Skip to content

Commit f2c01bf

Browse files
committed
feat: add node auto repair configuration for EKS managed node groups
1 parent 46ee041 commit f2c01bf

File tree

9 files changed

+152
-0
lines changed

9 files changed

+152
-0
lines changed

config/crd/bases/infrastructure.cluster.x-k8s.io_awsmanagedmachinepools.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,17 @@ spec:
987987
- name
988988
type: object
989989
type: array
990+
nodeRepairConfig:
991+
description: NodeRepairConfig specifies the node auto repair configuration
992+
for the managed node group.
993+
properties:
994+
enabled:
995+
default: false
996+
description: |-
997+
Enabled specifies whether node auto repair is enabled for the node group.
998+
When enabled, EKS will automatically repair unhealthy nodes by replacing them.
999+
type: boolean
1000+
type: object
9901001
providerIDList:
9911002
description: |-
9921003
ProviderIDList are the provider IDs of instances in the

exp/api/v1beta1/conversion.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ func (src *AWSManagedMachinePool) ConvertTo(dstRaw conversion.Hub) error {
149149
dst.Spec.RolePath = restored.Spec.RolePath
150150
dst.Spec.RolePermissionsBoundary = restored.Spec.RolePermissionsBoundary
151151

152+
if restored.Spec.NodeRepairConfig != nil {
153+
dst.Spec.NodeRepairConfig = restored.Spec.NodeRepairConfig
154+
}
155+
152156
return nil
153157
}
154158

exp/api/v1beta1/zz_generated.conversion.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exp/api/v1beta2/awsmanagedmachinepool_types.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,10 @@ type AWSManagedMachinePoolSpec struct {
214214
// AWSLifecycleHooks specifies lifecycle hooks for the managed node group.
215215
// +optional
216216
AWSLifecycleHooks []AWSLifecycleHook `json:"lifecycleHooks,omitempty"`
217+
218+
// NodeRepairConfig specifies the node auto repair configuration for the managed node group.
219+
// +optional
220+
NodeRepairConfig *NodeRepairConfig `json:"nodeRepairConfig,omitempty"`
217221
}
218222

219223
// ManagedMachinePoolScaling specifies scaling options.
@@ -297,6 +301,15 @@ type AWSManagedMachinePoolStatus struct {
297301
Conditions clusterv1.Conditions `json:"conditions,omitempty"`
298302
}
299303

304+
// NodeRepairConfig defines the node auto repair configuration for managed node groups.
305+
type NodeRepairConfig struct {
306+
// Enabled specifies whether node auto repair is enabled for the node group.
307+
// When enabled, EKS will automatically repair unhealthy nodes by replacing them.
308+
// +optional
309+
// +kubebuilder:default=false
310+
Enabled *bool `json:"enabled,omitempty"`
311+
}
312+
300313
// +kubebuilder:object:root=true
301314
// +kubebuilder:resource:path=awsmanagedmachinepools,scope=Namespaced,categories=cluster-api,shortName=awsmmp
302315
// +kubebuilder:storageversion

exp/api/v1beta2/zz_generated.deepcopy.go

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/cloud/converters/eks.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,20 @@ func NodegroupUpdateconfigFromSDK(ngUpdateConfig *ekstypes.NodegroupUpdateConfig
217217
return converted
218218
}
219219

220+
// NodeRepairConfigToSDK is used to convert a CAPA NodeRepairConfig to AWS SDK NodeRepairConfig.
221+
func NodeRepairConfigToSDK(repairConfig *expinfrav1.NodeRepairConfig) *ekstypes.NodeRepairConfig {
222+
if repairConfig == nil {
223+
// Default to disabled if not specified to avoid behavior changes
224+
return &ekstypes.NodeRepairConfig{
225+
Enabled: aws.Bool(false),
226+
}
227+
}
228+
229+
return &ekstypes.NodeRepairConfig{
230+
Enabled: repairConfig.Enabled,
231+
}
232+
}
233+
220234
// AMITypeToSDK converts a CAPA ManagedMachineAMIType to AWS SDK AMIType.
221235
func AMITypeToSDK(amiType expinfrav1.ManagedMachineAMIType) ekstypes.AMITypes {
222236
switch amiType {

pkg/cloud/converters/eks_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package converters
18+
19+
import (
20+
"testing"
21+
22+
"github.com/aws/aws-sdk-go-v2/aws"
23+
ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
24+
"github.com/google/go-cmp/cmp"
25+
"github.com/google/go-cmp/cmp/cmpopts"
26+
27+
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
28+
)
29+
30+
func TestNodeRepairConfigToSDK(t *testing.T) {
31+
tests := []struct {
32+
name string
33+
input *expinfrav1.NodeRepairConfig
34+
expected *ekstypes.NodeRepairConfig
35+
}{
36+
{
37+
name: "nil input returns default disabled",
38+
input: nil,
39+
expected: &ekstypes.NodeRepairConfig{Enabled: aws.Bool(false)},
40+
},
41+
{
42+
name: "enabled repair config",
43+
input: &expinfrav1.NodeRepairConfig{
44+
Enabled: aws.Bool(true),
45+
},
46+
expected: &ekstypes.NodeRepairConfig{Enabled: aws.Bool(true)},
47+
},
48+
{
49+
name: "disabled repair config",
50+
input: &expinfrav1.NodeRepairConfig{
51+
Enabled: aws.Bool(false),
52+
},
53+
expected: &ekstypes.NodeRepairConfig{Enabled: aws.Bool(false)},
54+
},
55+
}
56+
57+
for _, tt := range tests {
58+
t.Run(tt.name, func(t *testing.T) {
59+
result := NodeRepairConfigToSDK(tt.input)
60+
if !cmp.Equal(result, tt.expected, cmpopts.IgnoreUnexported(ekstypes.NodeRepairConfig{})) {
61+
t.Errorf("NodeRepairConfigToSDK() diff (-want +got):\n%s", cmp.Diff(tt.expected, result, cmpopts.IgnoreUnexported(ekstypes.NodeRepairConfig{})))
62+
}
63+
})
64+
}
65+
}

pkg/cloud/services/eks/nodegroup.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
2929
iamtypes "github.com/aws/aws-sdk-go-v2/service/iam/types"
3030
"github.com/google/go-cmp/cmp"
31+
"github.com/google/go-cmp/cmp/cmpopts"
3132
"github.com/pkg/errors"
3233
"k8s.io/apimachinery/pkg/util/version"
3334

@@ -119,6 +120,11 @@ func (s *NodegroupService) updateConfig() (*ekstypes.NodegroupUpdateConfig, erro
119120
return converters.NodegroupUpdateconfigToSDK(updateConfig)
120121
}
121122

123+
func (s *NodegroupService) nodeRepairConfig() *ekstypes.NodeRepairConfig {
124+
repairConfig := s.scope.ManagedMachinePool.Spec.NodeRepairConfig
125+
return converters.NodeRepairConfigToSDK(repairConfig)
126+
}
127+
122128
func (s *NodegroupService) roleArn(ctx context.Context) (*string, error) {
123129
var role *iamtypes.Role
124130
if s.scope.RoleName() != "" {
@@ -249,6 +255,9 @@ func (s *NodegroupService) createNodegroup(ctx context.Context) (*ekstypes.Nodeg
249255
Version: s.scope.ManagedMachinePool.Status.LaunchTemplateVersion,
250256
}
251257
}
258+
if managedPool.NodeRepairConfig != nil {
259+
input.NodeRepairConfig = s.nodeRepairConfig()
260+
}
252261

253262
out, err := s.EKSClient.CreateNodegroup(ctx, input)
254263
if err != nil {
@@ -480,6 +489,14 @@ func (s *NodegroupService) reconcileNodegroupConfig(ctx context.Context, ng *eks
480489
input.UpdateConfig = updatedConfig
481490
needsUpdate = true
482491
}
492+
493+
specRepairConfig := s.nodeRepairConfig()
494+
if !cmp.Equal(ng.NodeRepairConfig, specRepairConfig, cmpopts.IgnoreUnexported(ekstypes.NodeRepairConfig{})) {
495+
s.Debug("Nodegroup repair configuration differs from spec, updating the nodegroup repair config", "nodegroup", ng.NodegroupName)
496+
input.NodeRepairConfig = specRepairConfig
497+
needsUpdate = true
498+
}
499+
483500
if !needsUpdate {
484501
s.Debug("node group config update not needed", "cluster", eksClusterName, "name", *ng.NodegroupName)
485502
return nil

test/e2e/data/eks/cluster-template-eks-managed-machinepool-only.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,5 @@ spec:
2525
maxSize: 2
2626
updateConfig:
2727
maxUnavailable: 2
28+
nodeRepairConfig:
29+
enabled: false

0 commit comments

Comments
 (0)