Skip to content

Commit 7cfeb00

Browse files
authored
Merge pull request #115 from arangodb/improved-ready-detection
Improved liveness detection
2 parents 163845d + dcf854b commit 7cfeb00

File tree

9 files changed

+217
-42
lines changed

9 files changed

+217
-42
lines changed

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,9 @@ run-unit-tests: $(GOBUILDDIR) $(SOURCES)
226226
$(REPOPATH)/pkg/deployment/reconcile \
227227
$(REPOPATH)/pkg/deployment/resources \
228228
$(REPOPATH)/pkg/util/k8sutil \
229-
$(REPOPATH)/pkg/util/k8sutil/test
229+
$(REPOPATH)/pkg/util/k8sutil/test \
230+
$(REPOPATH)/pkg/util/probe \
231+
$(REPOPATH)/pkg/util/validation
230232

231233
$(TESTBIN): $(GOBUILDDIR) $(SOURCES)
232234
@mkdir -p $(BINDIR)

main.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@ var (
8585
chaosOptions struct {
8686
allowed bool
8787
}
88-
deploymentProbe probe.Probe
89-
storageProbe probe.Probe
88+
livenessProbe probe.LivenessProbe
89+
deploymentProbe probe.ReadyProbe
90+
storageProbe probe.ReadyProbe
9091
)
9192

9293
func init() {
@@ -154,7 +155,7 @@ func cmdMainRun(cmd *cobra.Command, args []string) {
154155
}
155156

156157
mux := http.NewServeMux()
157-
mux.HandleFunc("/health", probe.LivenessHandler)
158+
mux.HandleFunc("/health", livenessProbe.LivenessHandler)
158159
mux.HandleFunc("/ready/deployment", deploymentProbe.ReadyHandler)
159160
mux.HandleFunc("/ready/storage", storageProbe.ReadyHandler)
160161
mux.Handle("/metrics", prometheus.Handler())
@@ -222,6 +223,7 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper
222223
KubeExtCli: kubeExtCli,
223224
CRCli: crCli,
224225
EventRecorder: eventRecorder,
226+
LivenessProbe: &livenessProbe,
225227
DeploymentProbe: &deploymentProbe,
226228
StorageProbe: &storageProbe,
227229
}

pkg/operator/operator.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,9 @@ type Dependencies struct {
7777
KubeExtCli apiextensionsclient.Interface
7878
CRCli versioned.Interface
7979
EventRecorder record.EventRecorder
80-
DeploymentProbe *probe.Probe
81-
StorageProbe *probe.Probe
80+
LivenessProbe *probe.LivenessProbe
81+
DeploymentProbe *probe.ReadyProbe
82+
StorageProbe *probe.ReadyProbe
8283
}
8384

8485
// NewOperator instantiates a new operator from given config & dependencies.

pkg/operator/operator_deployment.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ func (o *Operator) runDeployments(stop <-chan struct{}) {
6464

6565
// onAddArangoDeployment deployment addition callback
6666
func (o *Operator) onAddArangoDeployment(obj interface{}) {
67+
o.Dependencies.LivenessProbe.Lock()
68+
defer o.Dependencies.LivenessProbe.Unlock()
69+
6770
apiObject := obj.(*api.ArangoDeployment)
6871
o.log.Debug().
6972
Str("name", apiObject.GetObjectMeta().GetName()).
@@ -73,6 +76,9 @@ func (o *Operator) onAddArangoDeployment(obj interface{}) {
7376

7477
// onUpdateArangoDeployment deployment update callback
7578
func (o *Operator) onUpdateArangoDeployment(oldObj, newObj interface{}) {
79+
o.Dependencies.LivenessProbe.Lock()
80+
defer o.Dependencies.LivenessProbe.Unlock()
81+
7682
apiObject := newObj.(*api.ArangoDeployment)
7783
o.log.Debug().
7884
Str("name", apiObject.GetObjectMeta().GetName()).
@@ -82,6 +88,9 @@ func (o *Operator) onUpdateArangoDeployment(oldObj, newObj interface{}) {
8288

8389
// onDeleteArangoDeployment deployment delete callback
8490
func (o *Operator) onDeleteArangoDeployment(obj interface{}) {
91+
o.Dependencies.LivenessProbe.Lock()
92+
defer o.Dependencies.LivenessProbe.Unlock()
93+
8594
log := o.log
8695
apiObject, ok := obj.(*api.ArangoDeployment)
8796
if !ok {

pkg/operator/operator_local_storage.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ func (o *Operator) runLocalStorages(stop <-chan struct{}) {
6464

6565
// onAddArangoLocalStorage local storage addition callback
6666
func (o *Operator) onAddArangoLocalStorage(obj interface{}) {
67+
o.Dependencies.LivenessProbe.Lock()
68+
defer o.Dependencies.LivenessProbe.Unlock()
69+
6770
apiObject := obj.(*api.ArangoLocalStorage)
6871
o.log.Debug().
6972
Str("name", apiObject.GetObjectMeta().GetName()).
@@ -73,6 +76,9 @@ func (o *Operator) onAddArangoLocalStorage(obj interface{}) {
7376

7477
// onUpdateArangoLocalStorage local storage update callback
7578
func (o *Operator) onUpdateArangoLocalStorage(oldObj, newObj interface{}) {
79+
o.Dependencies.LivenessProbe.Lock()
80+
defer o.Dependencies.LivenessProbe.Unlock()
81+
7682
apiObject := newObj.(*api.ArangoLocalStorage)
7783
o.log.Debug().
7884
Str("name", apiObject.GetObjectMeta().GetName()).
@@ -82,6 +88,9 @@ func (o *Operator) onUpdateArangoLocalStorage(oldObj, newObj interface{}) {
8288

8389
// onDeleteArangoLocalStorage local storage delete callback
8490
func (o *Operator) onDeleteArangoLocalStorage(obj interface{}) {
91+
o.Dependencies.LivenessProbe.Lock()
92+
defer o.Dependencies.LivenessProbe.Unlock()
93+
8594
log := o.log
8695
apiObject, ok := obj.(*api.ArangoLocalStorage)
8796
if !ok {

pkg/util/probe/health.go

Lines changed: 0 additions & 32 deletions
This file was deleted.

pkg/util/probe/liveness.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
// Author Ewout Prangsma
21+
//
22+
23+
package probe
24+
25+
import (
26+
"net/http"
27+
"sync"
28+
"time"
29+
)
30+
31+
const (
32+
livenessHandlerTimeout = time.Second * 5
33+
)
34+
35+
// LivenessProbe wraps a liveness probe handler.
36+
type LivenessProbe struct {
37+
lock int32
38+
mutex sync.Mutex
39+
waitChan chan struct{}
40+
}
41+
42+
// Lock the probe, preventing the LivenessHandler from responding to requests.
43+
func (p *LivenessProbe) Lock() {
44+
p.mutex.Lock()
45+
defer p.mutex.Unlock()
46+
47+
p.lock++
48+
}
49+
50+
// Unlock the probe, allowing the LivenessHandler to respond to requests.
51+
func (p *LivenessProbe) Unlock() {
52+
p.mutex.Lock()
53+
defer p.mutex.Unlock()
54+
55+
p.lock--
56+
57+
if p.lock == 0 && p.waitChan != nil {
58+
w := p.waitChan
59+
p.waitChan = nil
60+
close(w)
61+
}
62+
}
63+
64+
// waitUntilNotLocked blocks until the probe is no longer locked
65+
// or a timeout occurs.
66+
// Returns true if the probe is unlocked, false on timeout.
67+
func (p *LivenessProbe) waitUntilNotLocked(timeout time.Duration) bool {
68+
deadline := time.Now().Add(timeout)
69+
for {
70+
var w chan struct{}
71+
p.mutex.Lock()
72+
locked := p.lock != 0
73+
if locked {
74+
if p.waitChan == nil {
75+
p.waitChan = make(chan struct{})
76+
}
77+
w = p.waitChan
78+
}
79+
p.mutex.Unlock()
80+
if !locked {
81+
// All good
82+
return true
83+
}
84+
// We're locked, wait until w is closed
85+
select {
86+
case <-w:
87+
// continue
88+
case <-time.After(time.Until(deadline)):
89+
// Timeout
90+
return false
91+
}
92+
}
93+
}
94+
95+
// LivenessHandler writes back the HTTP status code 200 if the operator is ready, and 500 otherwise.
96+
func (p *LivenessProbe) LivenessHandler(w http.ResponseWriter, r *http.Request) {
97+
if p.waitUntilNotLocked(livenessHandlerTimeout) {
98+
w.WriteHeader(http.StatusOK)
99+
} else {
100+
w.WriteHeader(http.StatusInternalServerError)
101+
}
102+
}

pkg/util/probe/liveness_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
// Author Ewout Prangsma
21+
//
22+
23+
package probe
24+
25+
import (
26+
"sync"
27+
"testing"
28+
"time"
29+
30+
"github.com/stretchr/testify/assert"
31+
)
32+
33+
func TestLivenessLock(t *testing.T) {
34+
p := &LivenessProbe{}
35+
assert.True(t, p.waitUntilNotLocked(time.Millisecond))
36+
37+
// Test single lock
38+
p.Lock()
39+
assert.False(t, p.waitUntilNotLocked(time.Millisecond))
40+
p.Unlock()
41+
assert.True(t, p.waitUntilNotLocked(time.Millisecond))
42+
43+
// Test multiple locks
44+
p.Lock()
45+
assert.False(t, p.waitUntilNotLocked(time.Millisecond))
46+
p.Lock()
47+
assert.False(t, p.waitUntilNotLocked(time.Millisecond))
48+
p.Unlock()
49+
assert.False(t, p.waitUntilNotLocked(time.Millisecond))
50+
p.Unlock()
51+
assert.True(t, p.waitUntilNotLocked(time.Millisecond))
52+
53+
// Test concurrent waits
54+
wg := sync.WaitGroup{}
55+
p.Lock()
56+
wg.Add(1)
57+
go func() {
58+
// Waiter 1
59+
defer wg.Done()
60+
assert.True(t, p.waitUntilNotLocked(time.Millisecond*200))
61+
}()
62+
wg.Add(1)
63+
go func() {
64+
// Waiter 2
65+
defer wg.Done()
66+
assert.True(t, p.waitUntilNotLocked(time.Millisecond*200))
67+
}()
68+
wg.Add(1)
69+
go func() {
70+
// Waiter 3
71+
defer wg.Done()
72+
assert.False(t, p.waitUntilNotLocked(time.Millisecond*5))
73+
}()
74+
wg.Add(1)
75+
go func() {
76+
// Unlocker
77+
defer wg.Done()
78+
time.Sleep(time.Millisecond * 50)
79+
p.Unlock()
80+
}()
81+
wg.Wait()
82+
}

pkg/util/probe/ready.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,18 @@ import (
2727
"sync/atomic"
2828
)
2929

30-
// Probe wraps a readiness probe handler.
31-
type Probe struct {
30+
// ReadyProbe wraps a readiness probe handler.
31+
type ReadyProbe struct {
3232
ready int32
3333
}
3434

3535
// SetReady marks the probe as ready.
36-
func (p *Probe) SetReady() {
36+
func (p *ReadyProbe) SetReady() {
3737
atomic.StoreInt32(&p.ready, 1)
3838
}
3939

4040
// ReadyHandler writes back the HTTP status code 200 if the operator is ready, and 500 otherwise.
41-
func (p *Probe) ReadyHandler(w http.ResponseWriter, r *http.Request) {
41+
func (p *ReadyProbe) ReadyHandler(w http.ResponseWriter, r *http.Request) {
4242
isReady := atomic.LoadInt32(&p.ready) != 0
4343
if isReady {
4444
w.WriteHeader(http.StatusOK)

0 commit comments

Comments
 (0)