Skip to content

Commit aa31a19

Browse files
authored
[Feature] Add tolerations runtime rotation (#1214)
1 parent f3bcc21 commit aa31a19

File tree

16 files changed

+456
-120
lines changed

16 files changed

+456
-120
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
- (Bugfix) Remove PDBs if group count is 0
55
- (Feature) Add SpecPropagated condition
66
- (Bugfix) Recover from locked ShuttingDown state
7+
- (Feature) Add tolerations runtime rotation
78

89
## [1.2.22](https://github.com/arangodb/kube-arangodb/tree/1.2.22) (2022-12-13)
910
- (Bugfix) Do not manage ports in managed ExternalAccess mode

docs/generated/actions.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
| RotateStopMember | no | 15m0s | Community & Enterprise | Finalize member rotation. After this action member is started back |
5858
| RuntimeContainerArgsLogLevelUpdate | no | 10m0s | Community & Enterprise | Change ArangoDB Member log levels in runtime |
5959
| RuntimeContainerImageUpdate | no | 10m0s | Community & Enterprise | Update Container Image in runtime |
60+
| RuntimeContainerSyncTolerations | no | 10m0s | Community & Enterprise | Update Pod Tolerations in runtime |
6061
| SetCondition | no | 10m0s | Community & Enterprise | (Deprecated) Set deployment condition |
6162
| SetConditionV2 | no | 10m0s | Community & Enterprise | Set deployment condition |
6263
| SetCurrentImage | no | 6h0m0s | Community & Enterprise | Update deployment current image after image discovery |
@@ -140,6 +141,7 @@ spec:
140141
RotateStopMember: 15m0s
141142
RuntimeContainerArgsLogLevelUpdate: 10m0s
142143
RuntimeContainerImageUpdate: 10m0s
144+
RuntimeContainerSyncTolerations: 10m0s
143145
SetCondition: 10m0s
144146
SetConditionV2: 10m0s
145147
SetCurrentImage: 6h0m0s

internal/actions.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ actions:
202202
description: Update Cluster license (3.9+)
203203
RuntimeContainerImageUpdate:
204204
description: Update Container Image in runtime
205+
RuntimeContainerSyncTolerations:
206+
description: Update Pod Tolerations in runtime
205207
RuntimeContainerArgsLogLevelUpdate:
206208
description: Change ArangoDB Member log levels in runtime
207209
TopologyEnable:

pkg/apis/deployment/v1/actions.generated.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ const (
131131
ActionRuntimeContainerArgsLogLevelUpdateDefaultTimeout time.Duration = ActionsDefaultTimeout
132132
// ActionRuntimeContainerImageUpdateDefaultTimeout define default timeout for action ActionRuntimeContainerImageUpdate
133133
ActionRuntimeContainerImageUpdateDefaultTimeout time.Duration = ActionsDefaultTimeout
134+
// ActionRuntimeContainerSyncTolerationsDefaultTimeout define default timeout for action ActionRuntimeContainerSyncTolerations
135+
ActionRuntimeContainerSyncTolerationsDefaultTimeout time.Duration = ActionsDefaultTimeout
134136
// ActionSetConditionDefaultTimeout define default timeout for action ActionSetCondition
135137
ActionSetConditionDefaultTimeout time.Duration = ActionsDefaultTimeout
136138
// ActionSetConditionV2DefaultTimeout define default timeout for action ActionSetConditionV2
@@ -284,6 +286,8 @@ const (
284286
ActionTypeRuntimeContainerArgsLogLevelUpdate ActionType = "RuntimeContainerArgsLogLevelUpdate"
285287
// ActionTypeRuntimeContainerImageUpdate in scopes Normal. Update Container Image in runtime
286288
ActionTypeRuntimeContainerImageUpdate ActionType = "RuntimeContainerImageUpdate"
289+
// ActionTypeRuntimeContainerSyncTolerations in scopes Normal. Update Pod Tolerations in runtime
290+
ActionTypeRuntimeContainerSyncTolerations ActionType = "RuntimeContainerSyncTolerations"
287291
// ActionTypeSetCondition in scopes High. (Deprecated) Set deployment condition
288292
ActionTypeSetCondition ActionType = "SetCondition"
289293
// ActionTypeSetConditionV2 in scopes High. Set deployment condition
@@ -438,6 +442,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
438442
return ActionRuntimeContainerArgsLogLevelUpdateDefaultTimeout
439443
case ActionTypeRuntimeContainerImageUpdate:
440444
return ActionRuntimeContainerImageUpdateDefaultTimeout
445+
case ActionTypeRuntimeContainerSyncTolerations:
446+
return ActionRuntimeContainerSyncTolerationsDefaultTimeout
441447
case ActionTypeSetCondition:
442448
return ActionSetConditionDefaultTimeout
443449
case ActionTypeSetConditionV2:
@@ -596,6 +602,8 @@ func (a ActionType) Priority() ActionPriority {
596602
return ActionPriorityNormal
597603
case ActionTypeRuntimeContainerImageUpdate:
598604
return ActionPriorityNormal
605+
case ActionTypeRuntimeContainerSyncTolerations:
606+
return ActionPriorityNormal
599607
case ActionTypeSetCondition:
600608
return ActionPriorityHigh
601609
case ActionTypeSetConditionV2:

pkg/deployment/images.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import (
4545
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
4646
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/interfaces"
4747
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/kerrors"
48+
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/tolerations"
4849
)
4950

5051
var _ interfaces.PodCreator = &ImageUpdatePod{}
@@ -319,27 +320,27 @@ func (i *ImageUpdatePod) GetFinalizers() []string {
319320
}
320321

321322
func (i *ImageUpdatePod) GetTolerations() []core.Toleration {
322-
shortDur := k8sutil.TolerationDuration{
323+
shortDur := tolerations.TolerationDuration{
323324
Forever: false,
324325
TimeSpan: time.Second * 5,
325326
}
326327

327-
tolerations := make([]core.Toleration, 0, 3+len(i.spec.ID.Get().Tolerations))
328+
ts := make([]core.Toleration, 0, 3+len(i.spec.ID.Get().Tolerations))
328329

329330
if idTolerations := i.spec.ID.Get().Tolerations; len(idTolerations) > 0 {
330331
for _, toleration := range idTolerations {
331-
tolerations = k8sutil.AddTolerationIfNotFound(tolerations, toleration)
332+
ts = tolerations.AddTolerationIfNotFound(ts, toleration)
332333
}
333334
}
334335

335-
tolerations = k8sutil.AddTolerationIfNotFound(tolerations,
336-
k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeNotReady, shortDur))
337-
tolerations = k8sutil.AddTolerationIfNotFound(tolerations,
338-
k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeUnreachable, shortDur))
339-
tolerations = k8sutil.AddTolerationIfNotFound(tolerations,
340-
k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeAlphaUnreachable, shortDur))
336+
ts = tolerations.AddTolerationIfNotFound(ts,
337+
tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeNotReady, shortDur))
338+
ts = tolerations.AddTolerationIfNotFound(ts,
339+
tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeUnreachable, shortDur))
340+
ts = tolerations.AddTolerationIfNotFound(ts,
341+
tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeAlphaUnreachable, shortDur))
341342

342-
return tolerations
343+
return ts
343344
}
344345

345346
func (i *ImageUpdatePod) IsDeploymentMode() bool {

pkg/deployment/images_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
"github.com/arangodb/kube-arangodb/pkg/util"
3939
"github.com/arangodb/kube-arangodb/pkg/util/constants"
4040
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
41+
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/tolerations"
4142
)
4243

4344
const (
@@ -482,14 +483,14 @@ func createTestCommandForImageUpdatePod() []string {
482483

483484
func getTestTolerations() []core.Toleration {
484485

485-
shortDur := k8sutil.TolerationDuration{
486+
shortDur := tolerations.TolerationDuration{
486487
Forever: false,
487488
TimeSpan: time.Second * 5,
488489
}
489490

490491
return []core.Toleration{
491-
k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeNotReady, shortDur),
492-
k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeUnreachable, shortDur),
493-
k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeAlphaUnreachable, shortDur),
492+
tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeNotReady, shortDur),
493+
tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeUnreachable, shortDur),
494+
tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeAlphaUnreachable, shortDur),
494495
}
495496
}

pkg/deployment/reconcile/action.register.generated.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ var (
186186
_ Action = &actionRuntimeContainerImageUpdate{}
187187
_ actionFactory = newRuntimeContainerImageUpdateAction
188188

189+
_ Action = &actionRuntimeContainerSyncTolerations{}
190+
_ actionFactory = newRuntimeContainerSyncTolerationsAction
191+
189192
_ Action = &actionSetCondition{}
190193
_ actionFactory = newSetConditionAction
191194

@@ -894,6 +897,18 @@ func init() {
894897
registerAction(action, function)
895898
}
896899

900+
// RuntimeContainerSyncTolerations
901+
{
902+
// Get Action defition
903+
function := newRuntimeContainerSyncTolerationsAction
904+
action := api.ActionTypeRuntimeContainerSyncTolerations
905+
906+
// Wrap action main function
907+
908+
// Register action
909+
registerAction(action, function)
910+
}
911+
897912
// SetCondition
898913
{
899914
// Get Action defition

pkg/deployment/reconcile/action.register.generated_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,13 @@ func Test_Actions(t *testing.T) {
403403
})
404404
})
405405

406+
t.Run("RuntimeContainerSyncTolerations", func(t *testing.T) {
407+
ActionsExistence(t, api.ActionTypeRuntimeContainerSyncTolerations)
408+
t.Run("Internal", func(t *testing.T) {
409+
require.False(t, api.ActionTypeRuntimeContainerSyncTolerations.Internal())
410+
})
411+
})
412+
406413
t.Run("SetCondition", func(t *testing.T) {
407414
ActionsExistence(t, api.ActionTypeSetCondition)
408415
t.Run("Internal", func(t *testing.T) {

pkg/deployment/reconcile/action_runtime_container_image_update.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func (a actionRuntimeContainerImageUpdate) Post(ctx context.Context) error {
121121
return err
122122
}
123123

124-
return inspector.WithArangoMemberUpdate(ctx, cache, name, func(in *api.ArangoMember) (bool, error) {
124+
return inspector.WithArangoMemberStatusUpdate(ctx, cache, name, func(in *api.ArangoMember) (bool, error) {
125125
if in.Spec.Template == nil || in.Status.Template == nil ||
126126
in.Spec.Template.PodSpec == nil || in.Status.Template.PodSpec == nil {
127127
a.log.Info("Nil Member definition")
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
21+
package reconcile
22+
23+
import (
24+
"context"
25+
"reflect"
26+
27+
"github.com/pkg/errors"
28+
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
29+
"k8s.io/apimachinery/pkg/types"
30+
31+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
32+
"github.com/arangodb/kube-arangodb/pkg/deployment/patch"
33+
"github.com/arangodb/kube-arangodb/pkg/util/globals"
34+
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/tolerations"
35+
)
36+
37+
func newRuntimeContainerSyncTolerationsAction(action api.Action, actionCtx ActionContext) Action {
38+
a := &actionRuntimeContainerSyncTolerations{}
39+
40+
a.actionImpl = newActionImplDefRef(action, actionCtx)
41+
42+
return a
43+
}
44+
45+
type actionRuntimeContainerSyncTolerations struct {
46+
// actionImpl implement timeout and member id functions
47+
actionImpl
48+
49+
actionEmptyCheckProgress
50+
}
51+
52+
// Start starts the action for changing conditions on the provided member.
53+
func (a actionRuntimeContainerSyncTolerations) Start(ctx context.Context) (bool, error) {
54+
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
55+
if !ok {
56+
a.log.Info("member is gone already")
57+
return true, nil
58+
}
59+
60+
cache, ok := a.actionCtx.ACS().ClusterCache(m.ClusterID)
61+
if !ok {
62+
return true, errors.Errorf("Client is not ready")
63+
}
64+
65+
memberName := m.ArangoMemberName(a.actionCtx.GetName(), a.action.Group)
66+
member, ok := cache.ArangoMember().V1().GetSimple(memberName)
67+
if !ok {
68+
return false, errors.Errorf("ArangoMember %s not found", memberName)
69+
}
70+
71+
pod, ok := cache.Pod().V1().GetSimple(m.Pod.GetName())
72+
if !ok {
73+
a.log.Str("podName", m.Pod.GetName()).Info("pod is not present")
74+
return true, nil
75+
}
76+
77+
currentTolerations := pod.Spec.Tolerations
78+
79+
expectedTolerations := member.Spec.Template.PodSpec.Spec.Tolerations
80+
81+
calculatedTolerations := tolerations.MergeTolerationsIfNotFound(currentTolerations, expectedTolerations)
82+
83+
if reflect.DeepEqual(currentTolerations, calculatedTolerations) {
84+
return true, nil
85+
}
86+
87+
p, err := patch.NewPatch(patch.ItemReplace(patch.NewPath("spec", "tolerations"), calculatedTolerations)).Marshal()
88+
if err != nil {
89+
return false, errors.Wrapf(err, "Unable to create patch")
90+
}
91+
92+
nctx, c := globals.GetGlobalTimeouts().Kubernetes().WithTimeout(ctx)
93+
defer c()
94+
95+
if _, err := a.actionCtx.ACS().CurrentClusterCache().PodsModInterface().V1().Patch(nctx, pod.GetName(), types.JSONPatchType, p, meta.PatchOptions{}); err != nil {
96+
return false, errors.Wrapf(err, "Unable to apply patch")
97+
}
98+
99+
return true, nil
100+
}

0 commit comments

Comments
 (0)