Skip to content

Commit f346963

Browse files
authored
[Feature] Add Startup Probe support (#836)
1 parent 281f28f commit f346963

File tree

11 files changed

+228
-30
lines changed

11 files changed

+228
-30
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- Add Graceful shutdown as finalizer (supports kubectl delete)
88
- Add Watch to Lifecycle command
99
- Add Topology Discovery
10+
- Add Support for StartupProbe
1011

1112
## [1.2.4](https://github.com/arangodb/kube-arangodb/tree/1.2.4) (2021-10-22)
1213
- Replace `beta.kubernetes.io/arch` Pod label with `kubernetes.io/arch` using Silent Rotation

pkg/apis/deployment/v1/server_group_spec.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,11 @@ type ServerGroupProbesSpec struct {
261261
ReadinessProbeDisabled *bool `json:"readinessProbeDisabled,omitempty"`
262262
// ReadinessProbeSpec override readiness probe configuration
263263
ReadinessProbeSpec *ServerGroupProbeSpec `json:"readinessProbeSpec,omitempty"`
264+
265+
// StartupProbeDisabled if true startupProbes are disabled
266+
StartupProbeDisabled *bool `json:"startupProbeDisabled,omitempty"`
267+
// StartupProbeSpec override startup probe configuration
268+
StartupProbeSpec *ServerGroupProbeSpec `json:"startupProbeSpec,omitempty"`
264269
}
265270

266271
// GetReadinessProbeDisabled returns in proper manner readiness probe flag with backward compatibility.

pkg/deployment/images.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,8 @@ func (a *ContainerIdentity) GetPorts() []core.ContainerPort {
405405
}
406406
}
407407

408-
func (a *ContainerIdentity) GetProbes() (*core.Probe, *core.Probe, error) {
409-
return nil, nil, nil
408+
func (a *ContainerIdentity) GetProbes() (*core.Probe, *core.Probe, *core.Probe, error) {
409+
return nil, nil, nil, nil
410410
}
411411

412412
func (a *ContainerIdentity) GetResourceRequirements() core.ResourceRequirements {

pkg/deployment/pod/probes.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,37 +39,47 @@ func LivenessSpec(group api.ServerGroup) Probe {
3939
return probeMap[group].liveness
4040
}
4141

42+
func StartupSpec(group api.ServerGroup) Probe {
43+
return probeMap[group].startup
44+
}
45+
4246
type Probe struct {
4347
CanBeEnabled, EnabledByDefault bool
4448
}
4549

4650
type probes struct {
47-
liveness, readiness Probe
51+
liveness, readiness, startup Probe
4852
}
4953

5054
// probeMap defines default values and if Probe can be enabled
5155
var probeMap = map[api.ServerGroup]probes{
5256
api.ServerGroupSingle: {
57+
startup: newProbe(true, false),
5358
liveness: newProbe(true, true),
5459
readiness: newProbe(true, true),
5560
},
5661
api.ServerGroupAgents: {
62+
startup: newProbe(true, false),
5763
liveness: newProbe(true, true),
5864
readiness: newProbe(true, false),
5965
},
6066
api.ServerGroupDBServers: {
67+
startup: newProbe(true, false),
6168
liveness: newProbe(true, true),
6269
readiness: newProbe(true, false),
6370
},
6471
api.ServerGroupCoordinators: {
72+
startup: newProbe(true, false),
6573
liveness: newProbe(true, false),
6674
readiness: newProbe(true, true),
6775
},
6876
api.ServerGroupSyncMasters: {
77+
startup: newProbe(true, false),
6978
liveness: newProbe(true, true),
7079
readiness: newProbe(false, false),
7180
},
7281
api.ServerGroupSyncWorkers: {
82+
startup: newProbe(true, false),
7383
liveness: newProbe(true, true),
7484
readiness: newProbe(false, false),
7585
},

pkg/deployment/resources/pod_creator_arangod.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -136,17 +136,22 @@ func (a *ArangoDContainer) GetSecurityContext() *core.SecurityContext {
136136
return a.groupSpec.SecurityContext.NewSecurityContext()
137137
}
138138

139-
func (a *ArangoDContainer) GetProbes() (*core.Probe, *core.Probe, error) {
140-
var liveness, readiness *core.Probe
139+
func (a *ArangoDContainer) GetProbes() (*core.Probe, *core.Probe, *core.Probe, error) {
140+
var liveness, readiness, startup *core.Probe
141141

142142
probeLivenessConfig, err := a.resources.getLivenessProbe(a.spec, a.group, a.imageInfo.ArangoDBVersion)
143143
if err != nil {
144-
return nil, nil, err
144+
return nil, nil, nil, err
145145
}
146146

147147
probeReadinessConfig, err := a.resources.getReadinessProbe(a.spec, a.group, a.imageInfo.ArangoDBVersion)
148148
if err != nil {
149-
return nil, nil, err
149+
return nil, nil, nil, err
150+
}
151+
152+
probeStartupConfig, err := a.resources.getStartupProbe(a.spec, a.group, a.imageInfo.ArangoDBVersion)
153+
if err != nil {
154+
return nil, nil, nil, err
150155
}
151156

152157
if probeLivenessConfig != nil {
@@ -157,7 +162,11 @@ func (a *ArangoDContainer) GetProbes() (*core.Probe, *core.Probe, error) {
157162
readiness = probeReadinessConfig.Create()
158163
}
159164

160-
return liveness, readiness, nil
165+
if probeStartupConfig != nil {
166+
startup = probeStartupConfig.Create()
167+
}
168+
169+
return liveness, readiness, startup, nil
161170
}
162171

163172
func (a *ArangoDContainer) GetImage() string {
@@ -602,8 +611,8 @@ func (a *ArangoUpgradeContainer) GetName() string {
602611
}
603612

604613
// GetProbes returns no probes for the ArangoD upgrade container.
605-
func (a *ArangoUpgradeContainer) GetProbes() (*core.Probe, *core.Probe, error) {
606-
return nil, nil, nil
614+
func (a *ArangoUpgradeContainer) GetProbes() (*core.Probe, *core.Probe, *core.Probe, error) {
615+
return nil, nil, nil, nil
607616
}
608617

609618
// GetArgs returns list of arguments for the ArangoD version check container.
@@ -622,6 +631,6 @@ func (a *ArangoVersionCheckContainer) GetName() string {
622631
}
623632

624633
// GetProbes returns no probes for the ArangoD version check container.
625-
func (a *ArangoVersionCheckContainer) GetProbes() (*core.Probe, *core.Probe, error) {
626-
return nil, nil, nil
634+
func (a *ArangoVersionCheckContainer) GetProbes() (*core.Probe, *core.Probe, *core.Probe, error) {
635+
return nil, nil, nil, nil
627636
}

pkg/deployment/resources/pod_creator_probes.go

Lines changed: 149 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ type Probe interface {
4949
}
5050

5151
type probeCheckBuilder struct {
52-
liveness, readiness probeBuilder
52+
liveness, readiness, startup probeBuilder
5353
}
5454

5555
type probeBuilder func(spec api.DeploymentSpec, group api.ServerGroup, version driver.Version) (Probe, error)
@@ -118,6 +118,36 @@ func (r *Resources) getLivenessProbe(spec api.DeploymentSpec, group api.ServerGr
118118
return config, nil
119119
}
120120

121+
func (r *Resources) getStartupProbe(spec api.DeploymentSpec, group api.ServerGroup, version driver.Version) (Probe, error) {
122+
if !r.isStartupProbeEnabled(spec, group) {
123+
return nil, nil
124+
}
125+
126+
builders := r.probeBuilders()
127+
128+
builder, ok := builders[group]
129+
if !ok {
130+
return nil, nil
131+
}
132+
133+
config, err := builder.startup(spec, group, version)
134+
if err != nil {
135+
return nil, err
136+
}
137+
138+
groupSpec := spec.GetServerGroupSpec(group)
139+
140+
if !groupSpec.HasProbesSpec() {
141+
return config, nil
142+
}
143+
144+
probeSpec := groupSpec.GetProbesSpec()
145+
146+
config.SetSpec(probeSpec.StartupProbeSpec)
147+
148+
return config, nil
149+
}
150+
121151
func (r *Resources) isReadinessProbeEnabled(spec api.DeploymentSpec, group api.ServerGroup) bool {
122152
probe := pod.ReadinessSpec(group)
123153

@@ -146,29 +176,49 @@ func (r *Resources) isLivenessProbeEnabled(spec api.DeploymentSpec, group api.Se
146176
return probe.CanBeEnabled && probe.EnabledByDefault
147177
}
148178

179+
func (r *Resources) isStartupProbeEnabled(spec api.DeploymentSpec, group api.ServerGroup) bool {
180+
probe := pod.StartupSpec(group)
181+
182+
groupSpec := spec.GetServerGroupSpec(group)
183+
184+
if groupSpec.HasProbesSpec() {
185+
if p := groupSpec.GetProbesSpec().StartupProbeDisabled; p != nil {
186+
return !*p && probe.CanBeEnabled
187+
}
188+
}
189+
190+
return probe.CanBeEnabled && probe.EnabledByDefault
191+
}
192+
149193
func (r *Resources) probeBuilders() map[api.ServerGroup]probeCheckBuilder {
150194
return map[api.ServerGroup]probeCheckBuilder{
151195
api.ServerGroupSingle: {
196+
startup: r.probeBuilderStartupCoreSelect(),
152197
liveness: r.probeBuilderLivenessCoreSelect(),
153198
readiness: r.probeBuilderReadinessCoreSelect(),
154199
},
155200
api.ServerGroupAgents: {
201+
startup: r.probeBuilderStartupCoreSelect(),
156202
liveness: r.probeBuilderLivenessCoreSelect(),
157203
readiness: r.probeBuilderReadinessSimpleCoreSelect(),
158204
},
159205
api.ServerGroupDBServers: {
206+
startup: r.probeBuilderStartupCoreSelect(),
160207
liveness: r.probeBuilderLivenessCoreSelect(),
161208
readiness: r.probeBuilderReadinessSimpleCoreSelect(),
162209
},
163210
api.ServerGroupCoordinators: {
211+
startup: r.probeBuilderStartupCoreSelect(),
164212
liveness: r.probeBuilderLivenessCoreSelect(),
165213
readiness: r.probeBuilderReadinessCoreSelect(),
166214
},
167215
api.ServerGroupSyncMasters: {
216+
startup: r.probeBuilderStartupSync,
168217
liveness: r.probeBuilderLivenessSync,
169218
readiness: nilProbeBuilder,
170219
},
171220
api.ServerGroupSyncWorkers: {
221+
startup: r.probeBuilderStartupSync,
172222
liveness: r.probeBuilderLivenessSync,
173223
readiness: nilProbeBuilder,
174224
},
@@ -207,6 +257,14 @@ func (r *Resources) probeBuilderLivenessCoreSelect() probeBuilder {
207257
return r.probeBuilderLivenessCore
208258
}
209259

260+
func (r *Resources) probeBuilderStartupCoreSelect() probeBuilder {
261+
if features.JWTRotation().Enabled() {
262+
return r.probeBuilderStartupCoreOperator
263+
}
264+
265+
return r.probeBuilderStartupCore
266+
}
267+
210268
func (r *Resources) probeBuilderLivenessCoreOperator(spec api.DeploymentSpec, group api.ServerGroup, version driver.Version) (Probe, error) {
211269
args, err := r.probeCommand(spec, "/_api/version")
212270
if err != nil {
@@ -218,6 +276,29 @@ func (r *Resources) probeBuilderLivenessCoreOperator(spec api.DeploymentSpec, gr
218276
}, nil
219277
}
220278

279+
func (r *Resources) probeBuilderStartupCoreOperator(spec api.DeploymentSpec, group api.ServerGroup, version driver.Version) (Probe, error) {
280+
args, err := r.probeCommand(spec, "/_api/version")
281+
if err != nil {
282+
return nil, err
283+
}
284+
285+
var retries int32
286+
287+
switch group {
288+
case api.ServerGroupDBServers:
289+
retries = 6 * 60 * 60 / 5 // Wait 6 hours for wal replay
290+
default:
291+
retries = 60
292+
}
293+
294+
return &probes.CMDProbeConfig{
295+
Command: args,
296+
FailureThreshold: retries,
297+
PeriodSeconds: 5,
298+
InitialDelaySeconds: 1,
299+
}, nil
300+
}
301+
221302
func (r *Resources) probeBuilderLivenessCore(spec api.DeploymentSpec, group api.ServerGroup, version driver.Version) (Probe, error) {
222303
authorization := ""
223304
if spec.IsAuthenticated() {
@@ -237,6 +318,38 @@ func (r *Resources) probeBuilderLivenessCore(spec api.DeploymentSpec, group api.
237318
}, nil
238319
}
239320

321+
func (r *Resources) probeBuilderStartupCore(spec api.DeploymentSpec, group api.ServerGroup, version driver.Version) (Probe, error) {
322+
323+
var retries int32
324+
325+
switch group {
326+
case api.ServerGroupDBServers:
327+
retries = 6 * 60 * 60 / 5 // Wait 6 hours for wal replay
328+
default:
329+
retries = 60
330+
}
331+
332+
authorization := ""
333+
if spec.IsAuthenticated() {
334+
secretData, err := r.getJWTSecret(spec)
335+
if err != nil {
336+
return nil, errors.WithStack(err)
337+
}
338+
authorization, err = jwt.CreateArangodJwtAuthorizationHeaderAllowedPaths(secretData, "kube-arangodb", []string{"/_api/version"})
339+
if err != nil {
340+
return nil, errors.WithStack(err)
341+
}
342+
}
343+
return &probes.HTTPProbeConfig{
344+
LocalPath: "/_api/version",
345+
Secure: spec.IsSecure(),
346+
Authorization: authorization,
347+
FailureThreshold: retries,
348+
PeriodSeconds: 5,
349+
InitialDelaySeconds: 1,
350+
}, nil
351+
}
352+
240353
func (r *Resources) probeBuilderReadinessSimpleCoreSelect() probeBuilder {
241354
if features.JWTRotation().Enabled() {
242355
return r.probeBuilderReadinessSimpleCoreOperator
@@ -363,3 +476,38 @@ func (r *Resources) probeBuilderLivenessSync(spec api.DeploymentSpec, group api.
363476
Port: port,
364477
}, nil
365478
}
479+
480+
func (r *Resources) probeBuilderStartupSync(spec api.DeploymentSpec, group api.ServerGroup, version driver.Version) (Probe, error) {
481+
authorization := ""
482+
port := k8sutil.ArangoSyncMasterPort
483+
if group == api.ServerGroupSyncWorkers {
484+
port = k8sutil.ArangoSyncWorkerPort
485+
}
486+
if spec.Sync.Monitoring.GetTokenSecretName() != "" {
487+
// Use monitoring token
488+
token, err := r.getSyncMonitoringToken(spec)
489+
if err != nil {
490+
return nil, errors.WithStack(err)
491+
}
492+
authorization = "bearer " + token
493+
} else if group == api.ServerGroupSyncMasters {
494+
// Fall back to JWT secret
495+
secretData, err := r.getSyncJWTSecret(spec)
496+
if err != nil {
497+
return nil, errors.WithStack(err)
498+
}
499+
authorization, err = jwt.CreateArangodJwtAuthorizationHeaderAllowedPaths(secretData, "kube-arangodb", []string{"/_api/version"})
500+
if err != nil {
501+
return nil, errors.WithStack(err)
502+
}
503+
} else {
504+
// Don't have a probe
505+
return nil, nil
506+
}
507+
return &probes.HTTPProbeConfig{
508+
LocalPath: "/_api/version",
509+
Secure: spec.Sync.TLS.IsSecure(),
510+
Authorization: authorization,
511+
Port: port,
512+
}, nil
513+
}

pkg/deployment/resources/pod_creator_sync.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,17 +105,22 @@ func (a *ArangoSyncContainer) GetSecurityContext() *core.SecurityContext {
105105
return a.groupSpec.SecurityContext.NewSecurityContext()
106106
}
107107

108-
func (a *ArangoSyncContainer) GetProbes() (*core.Probe, *core.Probe, error) {
109-
var liveness, readiness *core.Probe
108+
func (a *ArangoSyncContainer) GetProbes() (*core.Probe, *core.Probe, *core.Probe, error) {
109+
var liveness, readiness, startup *core.Probe
110110

111111
probeLivenessConfig, err := a.resources.getLivenessProbe(a.spec, a.group, a.imageInfo.ArangoDBVersion)
112112
if err != nil {
113-
return nil, nil, err
113+
return nil, nil, nil, err
114114
}
115115

116116
probeReadinessConfig, err := a.resources.getReadinessProbe(a.spec, a.group, a.imageInfo.ArangoDBVersion)
117117
if err != nil {
118-
return nil, nil, err
118+
return nil, nil, nil, err
119+
}
120+
121+
probeStartupConfig, err := a.resources.getReadinessProbe(a.spec, a.group, a.imageInfo.ArangoDBVersion)
122+
if err != nil {
123+
return nil, nil, nil, err
119124
}
120125

121126
if probeLivenessConfig != nil {
@@ -126,7 +131,11 @@ func (a *ArangoSyncContainer) GetProbes() (*core.Probe, *core.Probe, error) {
126131
readiness = probeReadinessConfig.Create()
127132
}
128133

129-
return liveness, readiness, nil
134+
if probeStartupConfig != nil {
135+
startup = probeStartupConfig.Create()
136+
}
137+
138+
return liveness, readiness, startup, nil
130139
}
131140

132141
func (a *ArangoSyncContainer) GetResourceRequirements() core.ResourceRequirements {

0 commit comments

Comments
 (0)