Skip to content

Commit 463ab90

Browse files
authored
[Bugfix] Wait for Pod to be Ready in post-restart actions (#1206)
1 parent 9f4d397 commit 463ab90

File tree

12 files changed

+130
-20
lines changed

12 files changed

+130
-20
lines changed

CHANGELOG.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
- (Feature) Switch services to Port names
3737
- (Feature) Configurable ArangoD Port
3838
- (Feature) Allow to exclude metrics
39-
- (Feature) Do not restart member if all pods in group are not ready
40-
- (Bugfix) Do not stop Sync if Synchronization is in progress
39+
- (Bugfix) Do not stop Sync if Synchronization is in progress
40+
- (Bugfix) Wait for Pod to be Ready in post-restart actions
4141

4242
## [1.2.20](https://github.com/arangodb/kube-arangodb/tree/1.2.20) (2022-10-25)
4343
- (Feature) Add action progress

docs/generated/actions.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
| UpdateTLSSNI | no | 10m0s | Enterprise Only | Update certificate in SNI |
7878
| UpgradeMember | no | 6h0m0s | Community & Enterprise | Run the Upgrade procedure on member |
7979
| WaitForMemberInSync | no | 30m0s | Community & Enterprise | Wait for member to be in sync. In case of DBServer waits for shards. In case of Agents to catch-up on Agency index |
80+
| WaitForMemberReady | no | 30m0s | Community & Enterprise | Wait for member Ready condition |
8081
| WaitForMemberUp | no | 30m0s | Community & Enterprise | Wait for member to be responsive |
8182

8283

@@ -159,6 +160,7 @@ spec:
159160
UpdateTLSSNI: 10m0s
160161
UpgradeMember: 6h0m0s
161162
WaitForMemberInSync: 30m0s
163+
WaitForMemberReady: 30m0s
162164
WaitForMemberUp: 30m0s
163165

164166
```

internal/actions.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ actions:
4444
UpgradeMember:
4545
description: Run the Upgrade procedure on member
4646
timeout: 6h
47+
WaitForMemberReady:
48+
description: Wait for member Ready condition
49+
timeout: 30m
4750
WaitForMemberUp:
4851
description: Wait for member to be responsive
4952
timeout: 30m

pkg/apis/deployment/v1/actions.generated.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ const (
171171
ActionUpgradeMemberDefaultTimeout time.Duration = 21600 * time.Second // 6h0m0s
172172
// ActionWaitForMemberInSyncDefaultTimeout define default timeout for action ActionWaitForMemberInSync
173173
ActionWaitForMemberInSyncDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
174+
// ActionWaitForMemberReadyDefaultTimeout define default timeout for action ActionWaitForMemberReady
175+
ActionWaitForMemberReadyDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
174176
// ActionWaitForMemberUpDefaultTimeout define default timeout for action ActionWaitForMemberUp
175177
ActionWaitForMemberUpDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
176178

@@ -322,6 +324,8 @@ const (
322324
ActionTypeUpgradeMember ActionType = "UpgradeMember"
323325
// ActionTypeWaitForMemberInSync in scopes Normal. Wait for member to be in sync. In case of DBServer waits for shards. In case of Agents to catch-up on Agency index
324326
ActionTypeWaitForMemberInSync ActionType = "WaitForMemberInSync"
327+
// ActionTypeWaitForMemberReady in scopes Normal. Wait for member Ready condition
328+
ActionTypeWaitForMemberReady ActionType = "WaitForMemberReady"
325329
// ActionTypeWaitForMemberUp in scopes Normal. Wait for member to be responsive
326330
ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp"
327331
)
@@ -474,6 +478,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
474478
return ActionUpgradeMemberDefaultTimeout
475479
case ActionTypeWaitForMemberInSync:
476480
return ActionWaitForMemberInSyncDefaultTimeout
481+
case ActionTypeWaitForMemberReady:
482+
return ActionWaitForMemberReadyDefaultTimeout
477483
case ActionTypeWaitForMemberUp:
478484
return ActionWaitForMemberUpDefaultTimeout
479485
default:
@@ -630,6 +636,8 @@ func (a ActionType) Priority() ActionPriority {
630636
return ActionPriorityNormal
631637
case ActionTypeWaitForMemberInSync:
632638
return ActionPriorityNormal
639+
case ActionTypeWaitForMemberReady:
640+
return ActionPriorityNormal
633641
case ActionTypeWaitForMemberUp:
634642
return ActionPriorityNormal
635643
default:

pkg/apis/deployment/v2alpha1/actions.generated.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ const (
171171
ActionUpgradeMemberDefaultTimeout time.Duration = 21600 * time.Second // 6h0m0s
172172
// ActionWaitForMemberInSyncDefaultTimeout define default timeout for action ActionWaitForMemberInSync
173173
ActionWaitForMemberInSyncDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
174+
// ActionWaitForMemberReadyDefaultTimeout define default timeout for action ActionWaitForMemberReady
175+
ActionWaitForMemberReadyDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
174176
// ActionWaitForMemberUpDefaultTimeout define default timeout for action ActionWaitForMemberUp
175177
ActionWaitForMemberUpDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
176178

@@ -322,6 +324,8 @@ const (
322324
ActionTypeUpgradeMember ActionType = "UpgradeMember"
323325
// ActionTypeWaitForMemberInSync in scopes Normal. Wait for member to be in sync. In case of DBServer waits for shards. In case of Agents to catch-up on Agency index
324326
ActionTypeWaitForMemberInSync ActionType = "WaitForMemberInSync"
327+
// ActionTypeWaitForMemberReady in scopes Normal. Wait for member Ready condition
328+
ActionTypeWaitForMemberReady ActionType = "WaitForMemberReady"
325329
// ActionTypeWaitForMemberUp in scopes Normal. Wait for member to be responsive
326330
ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp"
327331
)
@@ -474,6 +478,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
474478
return ActionUpgradeMemberDefaultTimeout
475479
case ActionTypeWaitForMemberInSync:
476480
return ActionWaitForMemberInSyncDefaultTimeout
481+
case ActionTypeWaitForMemberReady:
482+
return ActionWaitForMemberReadyDefaultTimeout
477483
case ActionTypeWaitForMemberUp:
478484
return ActionWaitForMemberUpDefaultTimeout
479485
default:
@@ -630,6 +636,8 @@ func (a ActionType) Priority() ActionPriority {
630636
return ActionPriorityNormal
631637
case ActionTypeWaitForMemberInSync:
632638
return ActionPriorityNormal
639+
case ActionTypeWaitForMemberReady:
640+
return ActionPriorityNormal
633641
case ActionTypeWaitForMemberUp:
634642
return ActionPriorityNormal
635643
default:

pkg/deployment/reconcile/action.register.generated.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,9 @@ var (
246246
_ Action = &actionWaitForMemberInSync{}
247247
_ actionFactory = newWaitForMemberInSyncAction
248248

249+
_ Action = &actionWaitForMemberReady{}
250+
_ actionFactory = newWaitForMemberReadyAction
251+
249252
_ Action = &actionWaitForMemberUp{}
250253
_ actionFactory = newWaitForMemberUpAction
251254
)
@@ -1132,6 +1135,18 @@ func init() {
11321135
registerAction(action, function)
11331136
}
11341137

1138+
// WaitForMemberReady
1139+
{
1140+
// Get Action defition
1141+
function := newWaitForMemberReadyAction
1142+
action := api.ActionTypeWaitForMemberReady
1143+
1144+
// Wrap action main function
1145+
1146+
// Register action
1147+
registerAction(action, function)
1148+
}
1149+
11351150
// WaitForMemberUp
11361151
{
11371152
// Get Action defition

pkg/deployment/reconcile/action.register.generated_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,13 @@ func Test_Actions(t *testing.T) {
544544
})
545545
})
546546

547+
t.Run("WaitForMemberReady", func(t *testing.T) {
548+
ActionsExistence(t, api.ActionTypeWaitForMemberReady)
549+
t.Run("Internal", func(t *testing.T) {
550+
require.False(t, api.ActionTypeWaitForMemberReady.Internal())
551+
})
552+
})
553+
547554
t.Run("WaitForMemberUp", func(t *testing.T) {
548555
ActionsExistence(t, api.ActionTypeWaitForMemberUp)
549556
t.Run("Internal", func(t *testing.T) {

pkg/deployment/reconcile/action_add_member.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"context"
2525

2626
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
27-
"github.com/arangodb/kube-arangodb/pkg/deployment/actions"
2827
"github.com/arangodb/kube-arangodb/pkg/deployment/topology"
2928
"github.com/arangodb/kube-arangodb/pkg/util/errors"
3029
)
@@ -68,9 +67,5 @@ func (a *actionAddMember) Start(ctx context.Context) (bool, error) {
6867

6968
// ActionPlanAppender appends wait methods to the plan
7069
func (a *actionAddMember) ActionPlanAppender(current api.Plan) (api.Plan, bool) {
71-
np := api.Plan{
72-
actions.NewAction(api.ActionTypeWaitForMemberUp, a.action.Group, withPredefinedMember(a.newMemberID), "Wait for member in sync after creation"),
73-
actions.NewAction(api.ActionTypeWaitForMemberInSync, a.action.Group, withPredefinedMember(a.newMemberID), "Wait for member in sync after creation"),
74-
}
75-
return append(current, np...), true
70+
return withWaitForMember(current, a.action.Group, withPredefinedMember(a.action.MemberID)), true
7671
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2022 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
21+
package reconcile
22+
23+
import (
24+
"context"
25+
26+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
27+
"github.com/arangodb/kube-arangodb/pkg/util/errors"
28+
)
29+
30+
// newWaitForMemberReadyAction creates a new Action that implements the given
31+
// planned WaitForMemberReady action.
32+
func newWaitForMemberReadyAction(action api.Action, actionCtx ActionContext) Action {
33+
a := &actionWaitForMemberReady{}
34+
35+
a.actionImpl = newActionImplDefRef(action, actionCtx)
36+
37+
return a
38+
}
39+
40+
// actionWaitForMemberReady implements an WaitForMemberReady.
41+
type actionWaitForMemberReady struct {
42+
// actionImpl implement timeout and member id functions
43+
actionImpl
44+
}
45+
46+
// Start performs the start of the action.
47+
// Returns true if the action is completely finished, false in case
48+
// the start time needs to be recorded and a ready condition needs to be checked.
49+
func (a *actionWaitForMemberReady) Start(ctx context.Context) (bool, error) {
50+
ready, _, err := a.CheckProgress(ctx)
51+
if err != nil {
52+
return false, errors.WithStack(err)
53+
}
54+
return ready, nil
55+
}
56+
57+
// CheckProgress checks the progress of the action.
58+
// Returns true if the action is completely finished, false otherwise.
59+
func (a *actionWaitForMemberReady) CheckProgress(ctx context.Context) (bool, bool, error) {
60+
member, ok := a.actionCtx.GetMemberStatusByID(a.MemberID())
61+
if !ok || member.Phase == api.MemberPhaseFailed {
62+
a.log.Debug("Member in failed phase")
63+
return true, false, nil
64+
}
65+
66+
return member.Conditions.IsTrue(api.ConditionTypeReady), false, nil
67+
}

pkg/deployment/reconcile/plan_builder_rotate_upgrade.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,7 @@ func (r *Reconciler) createUpdatePlanInternal(apiObject k8sutil.APIObject, spec
199199
AddParam(api.ConditionTypePendingUpdate.String(), "").
200200
AddParam(api.ConditionTypeUpdating.String(), "T")}, false
201201
} else {
202-
p = p.After(
203-
actions.NewAction(api.ActionTypeWaitForMemberUp, m.Group, m.Member),
204-
actions.NewAction(api.ActionTypeWaitForMemberInSync, m.Group, m.Member))
202+
p = withWaitForMember(p, m.Group, m.Member)
205203

206204
p = p.Wrap(actions.NewAction(api.ActionTypeSetMemberCondition, m.Group, m.Member, reason).
207205
AddParam(api.ConditionTypePendingUpdate.String(), "").AddParam(api.ConditionTypeUpdating.String(), "T"),
@@ -458,9 +456,6 @@ func groupReadyForRestart(context PlanBuilderContext, status api.DeploymentStatu
458456
return false, "Not all members are serving"
459457
}
460458

461-
if !status.Members.MembersOfGroup(group).AllMembersReady() {
462-
return false, "Not all members are ready"
463-
}
464459
switch group {
465460
case api.ServerGroupDBServers:
466461
agencyState, ok := context.GetAgencyCache()
@@ -535,3 +530,15 @@ func skipResignLeadership(mode api.DeploymentMode, v driver.Version) bool {
535530
return mode == api.DeploymentModeCluster && features.Maintenance().Enabled() && ((v.CompareTo("3.6.0") >= 0 && v.CompareTo("3.6.14") <= 0) ||
536531
(v.CompareTo("3.7.0") >= 0 && v.CompareTo("3.7.12") <= 0))
537532
}
533+
534+
func withWaitForMember(plan api.Plan, group api.ServerGroup, member api.MemberStatus) api.Plan {
535+
return append(plan, waitForMemberActions(group, member)...)
536+
}
537+
538+
func waitForMemberActions(group api.ServerGroup, member api.MemberStatus) api.Plan {
539+
return api.Plan{
540+
actions.NewAction(api.ActionTypeWaitForMemberUp, group, member, "Wait for member to be up after creation"),
541+
actions.NewAction(api.ActionTypeWaitForMemberReady, group, member, "Wait for member pod to be ready after creation"),
542+
actions.NewAction(api.ActionTypeWaitForMemberInSync, group, member, "Wait for member to be in sync after creation"),
543+
}
544+
}

0 commit comments

Comments
 (0)