Skip to content

Commit e631b5c

Browse files
authored
Merge pull request #176 from arangodb/bugfix/service-endpoints
Improved readiness probe, database services only use ready pods
2 parents 812b861 + c5c6f3b commit e631b5c

File tree

7 files changed

+88
-34
lines changed

7 files changed

+88
-34
lines changed

pkg/apis/deployment/v1alpha/deployment_status_members.go

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
package v1alpha
2424

2525
import (
26-
"fmt"
27-
2826
"github.com/pkg/errors"
2927
)
3028

@@ -213,17 +211,25 @@ func (ds *DeploymentStatusMembers) RemoveByID(id string, group ServerGroup) erro
213211
return nil
214212
}
215213

216-
// AllMembersReady returns true when all members are in the Ready state.
217-
func (ds DeploymentStatusMembers) AllMembersReady() bool {
218-
if err := ds.ForeachServerGroup(func(group ServerGroup, list MemberStatusList) error {
219-
for _, x := range list {
220-
if !x.Conditions.IsTrue(ConditionTypeReady) {
221-
return fmt.Errorf("not ready")
222-
}
214+
// AllMembersReady returns true when all members, that must be ready for the given mode, are in the Ready state.
215+
func (ds DeploymentStatusMembers) AllMembersReady(mode DeploymentMode, syncEnabled bool) bool {
216+
syncReady := func() bool {
217+
if syncEnabled {
218+
return ds.SyncMasters.AllMembersReady() && ds.SyncWorkers.AllMembersReady()
223219
}
224-
return nil
225-
}); err != nil {
220+
return true
221+
}
222+
switch mode {
223+
case DeploymentModeSingle:
224+
return ds.Single.MembersReady() > 0
225+
case DeploymentModeActiveFailover:
226+
return ds.Agents.AllMembersReady() && ds.Single.MembersReady() > 0
227+
case DeploymentModeCluster:
228+
return ds.Agents.AllMembersReady() &&
229+
ds.DBServers.AllMembersReady() &&
230+
ds.Coordinators.AllMembersReady() &&
231+
syncReady()
232+
default:
226233
return false
227234
}
228-
return true
229235
}

pkg/apis/deployment/v1alpha/member_status_list.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,19 @@ func (l MemberStatusList) SelectMemberToRemove() (MemberStatus, error) {
134134
}
135135
return MemberStatus{}, maskAny(errors.Wrap(NotFoundError, "No member available for removal"))
136136
}
137+
138+
// MembersReady returns the number of members that are in the Ready state.
139+
func (l MemberStatusList) MembersReady() int {
140+
readyCount := 0
141+
for _, x := range l {
142+
if x.Conditions.IsTrue(ConditionTypeReady) {
143+
readyCount++
144+
}
145+
}
146+
return readyCount
147+
}
148+
149+
// AllMembersReady returns the true if all members are in the Ready state.
150+
func (l MemberStatusList) AllMembersReady() bool {
151+
return len(l) == l.MembersReady()
152+
}

pkg/deployment/resources/pod_creator.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ func (r *Resources) createLivenessProbe(spec api.DeploymentSpec, group api.Serve
359359

360360
// createReadinessProbe creates configuration for a readiness probe of a server in the given group.
361361
func (r *Resources) createReadinessProbe(spec api.DeploymentSpec, group api.ServerGroup) (*k8sutil.HTTPProbeConfig, error) {
362-
if group != api.ServerGroupCoordinators {
362+
if group != api.ServerGroupSingle && group != api.ServerGroupCoordinators {
363363
return nil, nil
364364
}
365365
authorization := ""
@@ -373,11 +373,18 @@ func (r *Resources) createReadinessProbe(spec api.DeploymentSpec, group api.Serv
373373
return nil, maskAny(err)
374374
}
375375
}
376-
return &k8sutil.HTTPProbeConfig{
377-
LocalPath: "/_api/version",
378-
Secure: spec.IsSecure(),
379-
Authorization: authorization,
380-
}, nil
376+
probeCfg := &k8sutil.HTTPProbeConfig{
377+
LocalPath: "/_api/version",
378+
Secure: spec.IsSecure(),
379+
Authorization: authorization,
380+
InitialDelaySeconds: 2,
381+
PeriodSeconds: 2,
382+
}
383+
switch spec.GetMode() {
384+
case api.DeploymentModeActiveFailover:
385+
probeCfg.LocalPath = "/_admin/echo"
386+
}
387+
return probeCfg, nil
381388
}
382389

383390
// createPodFinalizers creates a list of finalizers for a pod created for the given group.

pkg/deployment/resources/pod_inspector.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ func (r *Resources) InspectPods(ctx context.Context) error {
217217
})
218218

219219
// Update overall conditions
220-
allMembersReady := status.Members.AllMembersReady()
220+
spec := r.context.GetSpec()
221+
allMembersReady := status.Members.AllMembersReady(spec.GetMode(), spec.Sync.IsEnabled())
221222
status.Conditions.Update(api.ConditionTypeReady, allMembersReady, "", "")
222223

223224
// Update conditions

pkg/util/k8sutil/probes.go

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ type HTTPProbeConfig struct {
3737
Authorization string
3838
// Port to inspect (defaults to ArangoPort)
3939
Port int
40+
// Number of seconds after the container has started before liveness probes are initiated (defaults to 30)
41+
InitialDelaySeconds int32
42+
// Number of seconds after which the probe times out (defaults to 2).
43+
TimeoutSeconds int32
44+
// How often (in seconds) to perform the probe (defaults to 10).
45+
PeriodSeconds int32
46+
// Minimum consecutive successes for the probe to be considered successful after having failed (defaults to 1).
47+
SuccessThreshold int32
48+
// Minimum consecutive failures for the probe to be considered failed after having succeeded (defaults to 3).
49+
FailureThreshold int32
4050
}
4151

4252
// Create creates a probe from given config
@@ -52,23 +62,25 @@ func (config HTTPProbeConfig) Create() *v1.Probe {
5262
Value: config.Authorization,
5363
})
5464
}
55-
port := config.Port
56-
if port == 0 {
57-
port = ArangoPort
65+
def := func(value, defaultValue int32) int32 {
66+
if value != 0 {
67+
return value
68+
}
69+
return defaultValue
5870
}
5971
return &v1.Probe{
6072
Handler: v1.Handler{
6173
HTTPGet: &v1.HTTPGetAction{
6274
Path: config.LocalPath,
63-
Port: intstr.FromInt(port),
75+
Port: intstr.FromInt(int(def(int32(config.Port), ArangoPort))),
6476
Scheme: scheme,
6577
HTTPHeaders: headers,
6678
},
6779
},
68-
InitialDelaySeconds: 30, // Wait 30s before first probe
69-
TimeoutSeconds: 2, // Timeout of each probe is 2s
70-
PeriodSeconds: 10, // Interval between probes is 10s
71-
SuccessThreshold: 1, // Single probe is enough to indicate success
72-
FailureThreshold: 3, // Need 3 failed probes to consider a failed state
80+
InitialDelaySeconds: def(config.InitialDelaySeconds, 30), // Wait 30s before first probe
81+
TimeoutSeconds: def(config.TimeoutSeconds, 2), // Timeout of each probe is 2s
82+
PeriodSeconds: def(config.PeriodSeconds, 10), // Interval between probes is 10s
83+
SuccessThreshold: def(config.SuccessThreshold, 1), // Single probe is enough to indicate success
84+
FailureThreshold: def(config.FailureThreshold, 3), // Need 3 failed probes to consider a failed state
7385
}
7486
}

pkg/util/k8sutil/probes_test.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func TestCreate(t *testing.T) {
3434
secret := "the secret"
3535

3636
// http
37-
config := HTTPProbeConfig{path, false, secret, 0}
37+
config := HTTPProbeConfig{path, false, secret, 0, 0, 0, 0, 0, 0}
3838
probe := config.Create()
3939

4040
assert.Equal(t, probe.InitialDelaySeconds, int32(30))
@@ -50,8 +50,18 @@ func TestCreate(t *testing.T) {
5050
assert.Equal(t, probe.Handler.HTTPGet.Scheme, v1.URISchemeHTTP)
5151

5252
// https
53-
config = HTTPProbeConfig{path, true, secret, 0}
53+
config = HTTPProbeConfig{path, true, secret, 0, 0, 0, 0, 0, 0}
5454
probe = config.Create()
5555

5656
assert.Equal(t, probe.Handler.HTTPGet.Scheme, v1.URISchemeHTTPS)
57+
58+
// http, custom timing
59+
config = HTTPProbeConfig{path, false, secret, 0, 1, 2, 3, 4, 5}
60+
probe = config.Create()
61+
62+
assert.Equal(t, probe.InitialDelaySeconds, int32(1))
63+
assert.Equal(t, probe.TimeoutSeconds, int32(2))
64+
assert.Equal(t, probe.PeriodSeconds, int32(3))
65+
assert.Equal(t, probe.SuccessThreshold, int32(4))
66+
assert.Equal(t, probe.FailureThreshold, int32(5))
5767
}

pkg/util/k8sutil/services.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
package k8sutil
2424

2525
import (
26+
"strconv"
27+
2628
"k8s.io/api/core/v1"
2729
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2830
"k8s.io/client-go/kubernetes"
@@ -67,7 +69,7 @@ func CreateHeadlessService(kubecli kubernetes.Interface, deployment metav1.Objec
6769
Port: ArangoPort,
6870
},
6971
}
70-
publishNotReadyAddresses := false
72+
publishNotReadyAddresses := true
7173
serviceType := v1.ServiceTypeClusterIP
7274
newlyCreated, err := createService(kubecli, svcName, deploymentName, deployment.GetNamespace(), ClusterIPNone, "", serviceType, ports, "", publishNotReadyAddresses, owner)
7375
if err != nil {
@@ -96,8 +98,8 @@ func CreateDatabaseClientService(kubecli kubernetes.Interface, deployment metav1
9698
} else {
9799
role = "coordinator"
98100
}
99-
publishNotReadyAddresses := true
100101
serviceType := v1.ServiceTypeClusterIP
102+
publishNotReadyAddresses := false
101103
newlyCreated, err := createService(kubecli, svcName, deploymentName, deployment.GetNamespace(), "", role, serviceType, ports, "", publishNotReadyAddresses, owner)
102104
if err != nil {
103105
return "", false, maskAny(err)
@@ -119,7 +121,7 @@ func CreateExternalAccessService(kubecli kubernetes.Interface, svcName, role str
119121
NodePort: int32(nodePort),
120122
},
121123
}
122-
publishNotReadyAddresses := true
124+
publishNotReadyAddresses := false
123125
newlyCreated, err := createService(kubecli, svcName, deploymentName, deployment.GetNamespace(), "", role, serviceType, ports, loadBalancerIP, publishNotReadyAddresses, owner)
124126
if err != nil {
125127
return "", false, maskAny(err)
@@ -142,7 +144,7 @@ func createService(kubecli kubernetes.Interface, svcName, deploymentName, ns, cl
142144
// This annotation is deprecated, PublishNotReadyAddresses is
143145
// used instead. We leave the annotation in for a while.
144146
// See https://github.com/kubernetes/kubernetes/pull/49061
145-
TolerateUnreadyEndpointsAnnotation: "true",
147+
TolerateUnreadyEndpointsAnnotation: strconv.FormatBool(publishNotReadyAddresses),
146148
},
147149
},
148150
Spec: v1.ServiceSpec{

0 commit comments

Comments
 (0)