|
| 1 | +// |
| 2 | +// DISCLAIMER |
| 3 | +// |
| 4 | +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany |
| 5 | +// |
| 6 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +// you may not use this file except in compliance with the License. |
| 8 | +// You may obtain a copy of the License at |
| 9 | +// |
| 10 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +// |
| 12 | +// Unless required by applicable law or agreed to in writing, software |
| 13 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +// See the License for the specific language governing permissions and |
| 16 | +// limitations under the License. |
| 17 | +// |
| 18 | +// Copyright holder is ArangoDB GmbH, Cologne, Germany |
| 19 | +// |
| 20 | + |
| 21 | +package reconcile |
| 22 | + |
| 23 | +import ( |
| 24 | + "context" |
| 25 | + |
| 26 | + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" |
| 27 | + "github.com/arangodb/kube-arangodb/pkg/deployment/actions" |
| 28 | + "github.com/arangodb/kube-arangodb/pkg/deployment/agency" |
| 29 | + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" |
| 30 | +) |
| 31 | + |
| 32 | +// createMemberFailedRestoreNormalPlan returns only actions which are not recreate member. |
| 33 | +func (r *Reconciler) createMemberFailedRestoreNormalPlan(ctx context.Context, apiObject k8sutil.APIObject, |
| 34 | + spec api.DeploymentSpec, status api.DeploymentStatus, context PlanBuilderContext) api.Plan { |
| 35 | + condition := func(a api.Action) bool { |
| 36 | + return a.Type != api.ActionTypeRecreateMember |
| 37 | + } |
| 38 | + |
| 39 | + return r.createMemberFailedRestoreInternal(ctx, apiObject, spec, status, context).Filter(condition) |
| 40 | +} |
| 41 | + |
| 42 | +// createMemberFailedRestoreHighPlan returns only recreate member actions. |
| 43 | +func (r *Reconciler) createMemberFailedRestoreHighPlan(ctx context.Context, apiObject k8sutil.APIObject, |
| 44 | + spec api.DeploymentSpec, status api.DeploymentStatus, context PlanBuilderContext) api.Plan { |
| 45 | + condition := func(a api.Action) bool { |
| 46 | + return a.Type == api.ActionTypeRecreateMember |
| 47 | + } |
| 48 | + |
| 49 | + return r.createMemberFailedRestoreInternal(ctx, apiObject, spec, status, context).Filter(condition) |
| 50 | +} |
| 51 | + |
| 52 | +func (r *Reconciler) createMemberFailedRestoreInternal(_ context.Context, _ k8sutil.APIObject, spec api.DeploymentSpec, |
| 53 | + status api.DeploymentStatus, context PlanBuilderContext) api.Plan { |
| 54 | + var plan api.Plan |
| 55 | + |
| 56 | + // Fetch agency plan. |
| 57 | + agencyState, agencyOK := context.GetAgencyCache() |
| 58 | + |
| 59 | + // Check for members in failed state. |
| 60 | + status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { |
| 61 | + failed := 0 |
| 62 | + for _, m := range members { |
| 63 | + if m.Phase == api.MemberPhaseFailed { |
| 64 | + failed++ |
| 65 | + } |
| 66 | + } |
| 67 | + for _, m := range members { |
| 68 | + if m.Phase != api.MemberPhaseFailed || len(plan) > 0 { |
| 69 | + continue |
| 70 | + } |
| 71 | + |
| 72 | + memberLog := r.log.Str("id", m.ID).Str("role", group.AsRole()) |
| 73 | + |
| 74 | + if group == api.ServerGroupDBServers && spec.GetMode() == api.DeploymentModeCluster { |
| 75 | + if !agencyOK { |
| 76 | + // If agency is down DBServers should not be touched. |
| 77 | + memberLog.Info("Agency state is not present") |
| 78 | + continue |
| 79 | + } |
| 80 | + |
| 81 | + if c := spec.DBServers.GetCount(); c <= len(members)-failed { |
| 82 | + // There are more or equal alive members than current count. A member should not be recreated. |
| 83 | + continue |
| 84 | + } |
| 85 | + |
| 86 | + if agencyState.Plan.Collections.IsDBServerPresent(agency.Server(m.ID)) { |
| 87 | + // DBServer still exists in agency plan! Will not be removed, but needs to be recreated. |
| 88 | + memberLog.Info("Recreating DBServer - it cannot be removed gracefully") |
| 89 | + plan = append(plan, actions.NewAction(api.ActionTypeRecreateMember, group, m)) |
| 90 | + |
| 91 | + continue |
| 92 | + } |
| 93 | + // From here on, DBServer can be recreated. |
| 94 | + } |
| 95 | + |
| 96 | + switch group { |
| 97 | + case api.ServerGroupAgents: |
| 98 | + // For agents just recreate member do not rotate ID, do not remove PVC or service. |
| 99 | + memberLog.Info("Restoring old member. For agency members recreation of PVC is not supported - to prevent DataLoss") |
| 100 | + plan = append(plan, actions.NewAction(api.ActionTypeRecreateMember, group, m)) |
| 101 | + case api.ServerGroupSingle: |
| 102 | + // Do not remove data for single. |
| 103 | + memberLog.Info("Restoring old member. Rotation for single servers is not safe") |
| 104 | + plan = append(plan, actions.NewAction(api.ActionTypeRecreateMember, group, m)) |
| 105 | + default: |
| 106 | + if spec.GetAllowMemberRecreation(group) { |
| 107 | + memberLog.Info("Creating member replacement plan because member has failed") |
| 108 | + plan = append(plan, |
| 109 | + actions.NewAction(api.ActionTypeRemoveMember, group, m), |
| 110 | + actions.NewAction(api.ActionTypeAddMember, group, withPredefinedMember("")), |
| 111 | + actions.NewAction(api.ActionTypeWaitForMemberUp, group, withPredefinedMember(api.MemberIDPreviousAction)), |
| 112 | + ) |
| 113 | + } else { |
| 114 | + memberLog.Info("Restoring old member. Recreation is disabled for group") |
| 115 | + plan = append(plan, actions.NewAction(api.ActionTypeRecreateMember, group, m)) |
| 116 | + } |
| 117 | + } |
| 118 | + } |
| 119 | + return nil |
| 120 | + }) |
| 121 | + |
| 122 | + if len(plan) == 0 && !agencyOK { |
| 123 | + r.log.Warn("unable to build further plan without access to agency") |
| 124 | + plan = append(plan, actions.NewClusterAction(api.ActionTypeIdle)) |
| 125 | + } |
| 126 | + |
| 127 | + return plan |
| 128 | +} |
0 commit comments