Skip to content

Commit e860dc3

Browse files
committed
nodeclass add hash
Signed-off-by: helen <helenfrank@protonmail.com>
1 parent 1a59486 commit e860dc3

File tree

3 files changed

+137
-0
lines changed

3 files changed

+137
-0
lines changed

pkg/apis/v1alpha1/ecsnodeclass.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@ limitations under the License.
1717
package v1alpha1
1818

1919
import (
20+
"fmt"
2021
"log"
2122
"strings"
2223

24+
"github.com/mitchellh/hashstructure/v2"
2325
"github.com/samber/lo"
2426
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2527
)
@@ -206,6 +208,22 @@ type ECSNodeClass struct {
206208
Status ECSNodeClassStatus `json:"status,omitempty"`
207209
}
208210

211+
// We need to bump the ECSNodeClassHashVersion when we make an update to the ECSNodeClass CRD under these conditions:
212+
// 1. A field changes its default value for an existing field that is already hashed
213+
// 2. A field is added to the hash calculation with an already-set value
214+
// 3. A field is removed from the hash calculations
215+
const ECSNodeClassHashVersion = "v1"
216+
217+
func (in *ECSNodeClass) Hash() string {
218+
return fmt.Sprint(lo.Must(hashstructure.Hash([]interface{}{
219+
in.Spec,
220+
}, hashstructure.FormatV2, &hashstructure.HashOptions{
221+
SlicesAsSets: true,
222+
IgnoreZeroValue: true,
223+
ZeroNil: true,
224+
})))
225+
}
226+
209227
// ImageFamily If an alias is specified, return alias, or be 'Custom' (enforced via validation).
210228
func (in *ECSNodeClass) ImageFamily() string {
211229
if term, ok := lo.Find(in.Spec.ImageSelectorTerms, func(t ImageSelectorTerm) bool {

pkg/controllers/controllers.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828

2929
nodeclaimgarbagecollection "github.com/cloudpilot-ai/karpenter-provider-alicloud/pkg/controllers/nodeclaim/garbagecollection"
3030
nodeclaimtagging "github.com/cloudpilot-ai/karpenter-provider-alicloud/pkg/controllers/nodeclaim/tagging"
31+
nodeclasshash "github.com/cloudpilot-ai/karpenter-provider-alicloud/pkg/controllers/nodeclass/hash"
3132
controllerspricing "github.com/cloudpilot-ai/karpenter-provider-alicloud/pkg/controllers/providers/pricing"
3233
"github.com/cloudpilot-ai/karpenter-provider-alicloud/pkg/providers/instance"
3334
"github.com/cloudpilot-ai/karpenter-provider-alicloud/pkg/providers/pricing"
@@ -37,6 +38,7 @@ func NewControllers(ctx context.Context, mgr manager.Manager, clk clock.Clock, k
3738
cloudProvider cloudprovider.CloudProvider, instanceProvider instance.Provider, pricingProvider pricing.Provider) []controller.Controller {
3839

3940
controllers := []controller.Controller{
41+
nodeclasshash.NewController(kubeClient),
4042
controllerspricing.NewController(pricingProvider),
4143
nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider),
4244
nodeclaimtagging.NewController(kubeClient, instanceProvider),
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License.
13+
*/
14+
15+
package hash
16+
17+
import (
18+
"context"
19+
20+
"github.com/awslabs/operatorpkg/reasonable"
21+
"github.com/samber/lo"
22+
"go.uber.org/multierr"
23+
"k8s.io/apimachinery/pkg/api/equality"
24+
controllerruntime "sigs.k8s.io/controller-runtime"
25+
"sigs.k8s.io/controller-runtime/pkg/client"
26+
"sigs.k8s.io/controller-runtime/pkg/controller"
27+
"sigs.k8s.io/controller-runtime/pkg/manager"
28+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
29+
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
30+
"sigs.k8s.io/karpenter/pkg/operator/injection"
31+
32+
"github.com/cloudpilot-ai/karpenter-provider-alicloud/pkg/apis/v1alpha1"
33+
)
34+
35+
type Controller struct {
36+
kubeClient client.Client
37+
}
38+
39+
func NewController(kubeClient client.Client) *Controller {
40+
return &Controller{
41+
kubeClient: kubeClient,
42+
}
43+
}
44+
45+
func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1alpha1.ECSNodeClass) (reconcile.Result, error) {
46+
ctx = injection.WithControllerName(ctx, "nodeclass.hash")
47+
48+
stored := nodeClass.DeepCopy()
49+
50+
if nodeClass.Annotations[v1alpha1.AnnotationECSNodeClassHashVersion] != v1alpha1.ECSNodeClassHashVersion {
51+
if err := c.updateNodeClaimHash(ctx, nodeClass); err != nil {
52+
return reconcile.Result{}, err
53+
}
54+
}
55+
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{
56+
v1alpha1.AnnotationECSNodeClassHash: nodeClass.Hash(),
57+
v1alpha1.AnnotationECSNodeClassHashVersion: v1alpha1.ECSNodeClassHashVersion,
58+
})
59+
60+
if !equality.Semantic.DeepEqual(stored, nodeClass) {
61+
if err := c.kubeClient.Patch(ctx, nodeClass, client.MergeFrom(stored)); err != nil {
62+
return reconcile.Result{}, err
63+
}
64+
}
65+
66+
return reconcile.Result{}, nil
67+
}
68+
69+
func (c *Controller) Register(_ context.Context, m manager.Manager) error {
70+
return controllerruntime.NewControllerManagedBy(m).
71+
Named("nodeclass.hash").
72+
For(&v1alpha1.ECSNodeClass{}).
73+
WithOptions(controller.Options{
74+
RateLimiter: reasonable.RateLimiter(),
75+
MaxConcurrentReconciles: 10,
76+
}).
77+
Complete(reconcile.AsReconciler(m.GetClient(), c))
78+
}
79+
80+
// Updating `ecsnodeclass-hash-version` annotation inside the karpenter controller means a breaking change has been made to the hash calculation.
81+
// `ecsnodeclass-hash` annotation on the ecsNodeClass will be updated, due to the breaking change, making the `ecsnodeclass-hash` on the NodeClaim different from
82+
// ecsNodeClass. Since, we cannot rely on the `ecsnodeclass-hash` on the NodeClaims, due to the breaking change, we will need to re-calculate the hash and update the annotation.
83+
// For more information on the Drift Hash Versioning: https://github.com/kubernetes-sigs/karpenter/blob/main/designs/drift-hash-versioning.md
84+
func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1alpha1.ECSNodeClass) error {
85+
ncList := &karpv1.NodeClaimList{}
86+
if err := c.kubeClient.List(ctx, ncList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil {
87+
return err
88+
}
89+
90+
errs := make([]error, len(ncList.Items))
91+
for i := range ncList.Items {
92+
nc := ncList.Items[i]
93+
stored := nc.DeepCopy()
94+
95+
if nc.Annotations[v1alpha1.AnnotationECSNodeClassHashVersion] != v1alpha1.ECSNodeClassHashVersion {
96+
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
97+
v1alpha1.AnnotationECSNodeClassHashVersion: v1alpha1.ECSNodeClassHashVersion,
98+
})
99+
100+
// Any NodeClaim that is already drifted will remain drifted if the karpenter.k8s.aws/nodepool-hash-version doesn't match
101+
// Since the hashing mechanism has changed we will not be able to determine if the drifted status of the NodeClaim has changed
102+
if nc.StatusConditions().Get(karpv1.ConditionTypeDrifted) == nil {
103+
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
104+
v1alpha1.AnnotationECSNodeClassHash: nodeClass.Hash(),
105+
})
106+
}
107+
108+
if !equality.Semantic.DeepEqual(stored, nc) {
109+
if err := c.kubeClient.Patch(ctx, &nc, client.MergeFrom(stored)); err != nil {
110+
errs[i] = client.IgnoreNotFound(err)
111+
}
112+
}
113+
}
114+
}
115+
116+
return multierr.Combine(errs...)
117+
}

0 commit comments

Comments
 (0)