Skip to content

Commit fc8063c

Browse files
author
lamai93
committed
Reduce the amount of health calls to once every five seconds.
1 parent fd1a4e4 commit fc8063c

File tree

7 files changed

+61
-27
lines changed

7 files changed

+61
-27
lines changed

pkg/deployment/context_impl.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ func (d *Deployment) GetSpec() api.DeploymentSpec {
8484
return d.apiObject.Spec
8585
}
8686

87+
// GetDeploymentHealth returns a copy of the latest known state of cluster health
88+
func (d *Deployment) GetDeploymentHealth() (driver.ClusterHealth, error) {
89+
return d.resources.GetDeploymentHealth()
90+
}
91+
8792
// GetStatus returns the current status of the deployment
8893
// together with the current version of that status.
8994
func (d *Deployment) GetStatus() (api.DeploymentStatus, int32) {

pkg/deployment/reconcile/action_context.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ type ActionContext interface {
8484
// SetCurrentImage changes the CurrentImage field in the deployment
8585
// status to the given image.
8686
SetCurrentImage(imageInfo api.ImageInfo) error
87+
// GetDeploymentHealth returns a copy of the latest known state of cluster health
88+
GetDeploymentHealth() (driver.ClusterHealth, error)
8789
}
8890

8991
// newActionContext creates a new ActionContext implementation.
@@ -105,6 +107,11 @@ func (ac *actionContext) GetMode() api.DeploymentMode {
105107
return ac.context.GetSpec().GetMode()
106108
}
107109

110+
// GetDeploymentHealth returns a copy of the latest known state of cluster health
111+
func (ac *actionContext) GetDeploymentHealth() (driver.ClusterHealth, error) {
112+
return ac.context.GetDeploymentHealth()
113+
}
114+
108115
// GetDatabaseClient returns a cached client for the entire database (cluster coordinators or single server),
109116
// creating one if needed.
110117
func (ac *actionContext) GetDatabaseClient(ctx context.Context) (driver.Client, error) {

pkg/deployment/reconcile/action_remove_member.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,9 @@ func (a *actionRemoveMember) Start(ctx context.Context) (bool, error) {
7171
if !driver.IsNotFound(err) && !driver.IsPreconditionFailed(err) {
7272
return false, maskAny(errors.Wrapf(err, "Failed to remove server from cluster: %#v", err))
7373
} else if driver.IsPreconditionFailed(err) {
74-
cluster, err := client.Cluster(ctx)
74+
health, err := a.actionCtx.GetDeploymentHealth()
7575
if err != nil {
76-
return false, maskAny(errors.Wrapf(err, "Failed to obtain cluster: %#v", err))
77-
}
78-
health, err := cluster.Health(ctx)
79-
if err != nil {
80-
return false, maskAny(errors.Wrapf(err, "Failed to obtain cluster health: %#v", err))
76+
return false, maskAny(errors.Wrapf(err, "failed to get cluster health"))
8177
}
8278
// We don't care if not found
8379
if record, ok := health.Health[driver.ServerID(m.ID)]; ok {

pkg/deployment/reconcile/action_wait_for_member_up.go

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828

2929
driver "github.com/arangodb/go-driver"
3030
"github.com/arangodb/go-driver/agency"
31+
"github.com/pkg/errors"
3132
"github.com/rs/zerolog"
3233

3334
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
@@ -143,20 +144,9 @@ func (a *actionWaitForMemberUp) checkProgressAgent(ctx context.Context) (bool, b
143144
// of a cluster deployment (coordinator/dbserver).
144145
func (a *actionWaitForMemberUp) checkProgressCluster(ctx context.Context) (bool, bool, error) {
145146
log := a.log
146-
c, err := a.actionCtx.GetDatabaseClient(ctx)
147+
h, err := a.actionCtx.GetDeploymentHealth()
147148
if err != nil {
148-
log.Debug().Err(err).Msg("Failed to create database client")
149-
return false, false, maskAny(err)
150-
}
151-
cluster, err := c.Cluster(ctx)
152-
if err != nil {
153-
log.Debug().Err(err).Msg("Failed to access cluster")
154-
return false, false, maskAny(err)
155-
}
156-
h, err := cluster.Health(ctx)
157-
if err != nil {
158-
log.Debug().Err(err).Msg("Failed to get cluster health")
159-
return false, false, maskAny(err)
149+
return false, false, maskAny(errors.Wrapf(err, "failed to get cluster health"))
160150
}
161151
sh, found := h.Health[driver.ServerID(a.action.MemberID)]
162152
if !found {
@@ -168,12 +158,24 @@ func (a *actionWaitForMemberUp) checkProgressCluster(ctx context.Context) (bool,
168158
return false, false, nil
169159
}
170160
if a.action.Group == api.ServerGroupDBServers {
171-
dbs, err := c.Databases(ctx)
161+
inventoryCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
162+
defer cancel()
163+
c, err := a.actionCtx.GetDatabaseClient(ctx)
164+
if err != nil {
165+
log.Debug().Err(err).Msg("Failed to create database client")
166+
return false, false, maskAny(err)
167+
}
168+
cluster, err := c.Cluster(ctx)
169+
if err != nil {
170+
log.Debug().Err(err).Msg("Failed to access cluster")
171+
return false, false, maskAny(err)
172+
}
173+
dbs, err := c.Databases(inventoryCtx)
172174
if err != nil {
173175
return false, false, err
174176
}
175177
for _, db := range dbs {
176-
inv, err := cluster.DatabaseInventory(ctx, db)
178+
inv, err := cluster.DatabaseInventory(inventoryCtx, db)
177179
if err != nil {
178180
return false, false, err
179181
}

pkg/deployment/reconcile/context.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,6 @@ type Context interface {
9393
// GetExpectedPodArguments creates command line arguments for a server in the given group with given ID.
9494
GetExpectedPodArguments(apiObject metav1.Object, deplSpec api.DeploymentSpec, group api.ServerGroup,
9595
agents api.MemberStatusList, id string, version driver.Version) []string
96+
// GetDeploymentHealth returns a copy of the latest known state of cluster health
97+
GetDeploymentHealth() (driver.ClusterHealth, error)
9698
}

pkg/deployment/resources/deployment_health.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@ package resources
2424

2525
import (
2626
"context"
27+
"fmt"
2728
"time"
2829

30+
driver "github.com/arangodb/go-driver"
2931
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
3032
"github.com/arangodb/kube-arangodb/pkg/metrics"
3133
)
@@ -88,3 +90,21 @@ func (r *Resources) fetchDeploymentHealth() error {
8890
r.health.timestamp = time.Now()
8991
return nil
9092
}
93+
94+
// GetDeploymentHealth returns a copy of the latest known state of cluster health
95+
func (r *Resources) GetDeploymentHealth() (driver.ClusterHealth, error) {
96+
97+
r.health.mutex.Lock()
98+
defer r.health.mutex.Unlock()
99+
if r.health.timestamp.IsZero() {
100+
return driver.ClusterHealth{}, fmt.Errorf("No cluster health available")
101+
}
102+
103+
newhealth := r.health.clusterHealth
104+
newhealth.Health = make(map[driver.ServerID]driver.ServerHealth)
105+
106+
for k, v := range r.health.clusterHealth.Health {
107+
newhealth.Health[k] = v
108+
}
109+
return newhealth, nil
110+
}

pkg/deployment/resources/pod_termination.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,12 @@ func (r *Resources) prepareAgencyPodTermination(ctx context.Context, log zerolog
5858
agentDataWillBeGone := false
5959
if p.Spec.NodeName != "" {
6060
node, err := r.context.GetKubeCli().CoreV1().Nodes().Get(p.Spec.NodeName, metav1.GetOptions{})
61-
if err != nil {
61+
if k8sutil.IsNotFound(err) {
62+
log.Warn().Msg("Node not found")
63+
} else if err != nil {
6264
log.Warn().Err(err).Msg("Failed to get node for member")
6365
return maskAny(err)
64-
}
65-
if node.Spec.Unschedulable {
66+
} else if node.Spec.Unschedulable {
6667
agentDataWillBeGone = true
6768
}
6869
}
@@ -140,11 +141,12 @@ func (r *Resources) prepareDBServerPodTermination(ctx context.Context, log zerol
140141
dbserverDataWillBeGone := false
141142
if p.Spec.NodeName != "" {
142143
node, err := r.context.GetKubeCli().CoreV1().Nodes().Get(p.Spec.NodeName, metav1.GetOptions{})
143-
if err != nil {
144+
if k8sutil.IsNotFound(err) {
145+
log.Warn().Msg("Node not found")
146+
} else if err != nil {
144147
log.Warn().Err(err).Msg("Failed to get node for member")
145148
return maskAny(err)
146-
}
147-
if node.Spec.Unschedulable {
149+
} else if node.Spec.Unschedulable {
148150
dbserverDataWillBeGone = true
149151
}
150152
}

0 commit comments

Comments
 (0)