Skip to content

Commit 0bdffbd

Browse files
authored
Merge pull request #102 from arangodb/tls-renewal
Added automatic renewal of TLS server certificates
2 parents cf09ebd + e94b2d5 commit 0bdffbd

File tree

9 files changed

+231
-12
lines changed

9 files changed

+231
-12
lines changed

pkg/apis/deployment/v1alpha/plan.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ const (
4545
ActionTypeUpgradeMember ActionType = "UpgradeMember"
4646
// ActionTypeWaitForMemberUp causes the plan to wait until the member is considered "up".
4747
ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp"
48+
// ActionTypeRenewTLSCertificate causes the TLS certificate of a member to be renewed.
49+
ActionTypeRenewTLSCertificate ActionType = "RenewTLSCertificate"
4850
)
4951

5052
// Action represents a single action to be taken to update a deployment.

pkg/deployment/context_impl.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,3 +194,26 @@ func (d *Deployment) GetOwnedPods() ([]v1.Pod, error) {
194194
}
195195
return myPods, nil
196196
}
197+
198+
// GetTLSKeyfile returns the keyfile encoded TLS certificate+key for
199+
// the given member.
200+
func (d *Deployment) GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error) {
201+
secretName := k8sutil.CreateTLSKeyfileSecretName(d.apiObject.GetName(), group.AsRole(), member.ID)
202+
ns := d.apiObject.GetNamespace()
203+
result, err := k8sutil.GetTLSKeyfileSecret(d.deps.KubeCli.CoreV1(), secretName, ns)
204+
if err != nil {
205+
return "", maskAny(err)
206+
}
207+
return result, nil
208+
}
209+
210+
// DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member.
211+
// If the secret does not exist, the error is ignored.
212+
func (d *Deployment) DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error {
213+
secretName := k8sutil.CreateTLSKeyfileSecretName(d.apiObject.GetName(), group.AsRole(), member.ID)
214+
ns := d.apiObject.GetNamespace()
215+
if err := d.deps.KubeCli.CoreV1().Secrets(ns).Delete(secretName, &metav1.DeleteOptions{}); err != nil && !k8sutil.IsNotFound(err) {
216+
return maskAny(err)
217+
}
218+
return nil
219+
}

pkg/deployment/reconcile/action_context.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ type ActionContext interface {
6464
// DeletePvc deletes a persistent volume claim with given name in the namespace
6565
// of the deployment. If the pvc does not exist, the error is ignored.
6666
DeletePvc(pvcName string) error
67+
// DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member.
68+
// If the secret does not exist, the error is ignored.
69+
DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error
6770
}
6871

6972
// newActionContext creates a new ActionContext implementation.
@@ -181,3 +184,12 @@ func (ac *actionContext) DeletePvc(pvcName string) error {
181184
}
182185
return nil
183186
}
187+
188+
// DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member.
189+
// If the secret does not exist, the error is ignored.
190+
func (ac *actionContext) DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error {
191+
if err := ac.context.DeleteTLSKeyfile(group, member); err != nil {
192+
return maskAny(err)
193+
}
194+
return nil
195+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
// Author Ewout Prangsma
21+
//
22+
23+
package reconcile
24+
25+
import (
26+
"context"
27+
28+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
29+
"github.com/rs/zerolog"
30+
)
31+
32+
// NewRenewTLSCertificateAction creates a new Action that implements the given
33+
// planned RenewTLSCertificate action.
34+
func NewRenewTLSCertificateAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
35+
return &renewTLSCertificateAction{
36+
log: log,
37+
action: action,
38+
actionCtx: actionCtx,
39+
}
40+
}
41+
42+
// renewTLSCertificateAction implements a RenewTLSCertificate action.
43+
type renewTLSCertificateAction struct {
44+
log zerolog.Logger
45+
action api.Action
46+
actionCtx ActionContext
47+
}
48+
49+
// Start performs the start of the action.
50+
// Returns true if the action is completely finished, false in case
51+
// the start time needs to be recorded and a ready condition needs to be checked.
52+
func (a *renewTLSCertificateAction) Start(ctx context.Context) (bool, error) {
53+
log := a.log
54+
group := a.action.Group
55+
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
56+
if !ok {
57+
log.Error().Msg("No such member")
58+
}
59+
// Just delete the secret.
60+
// It will be re-created when the member restarts.
61+
if err := a.actionCtx.DeleteTLSKeyfile(group, m); err != nil {
62+
return false, maskAny(err)
63+
}
64+
return false, nil
65+
}
66+
67+
// CheckProgress checks the progress of the action.
68+
// Returns true if the action is completely finished, false otherwise.
69+
func (a *renewTLSCertificateAction) CheckProgress(ctx context.Context) (bool, error) {
70+
return true, nil
71+
}

pkg/deployment/reconcile/context.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,10 @@ type Context interface {
6363
DeletePvc(pvcName string) error
6464
// GetOwnedPods returns a list of all pods owned by the deployment.
6565
GetOwnedPods() ([]v1.Pod, error)
66+
// GetTLSKeyfile returns the keyfile encoded TLS certificate+key for
67+
// the given member.
68+
GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error)
69+
// DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member.
70+
// If the secret does not exist, the error is ignored.
71+
DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error
6672
}

pkg/deployment/reconcile/plan_builder.go

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@
2323
package reconcile
2424

2525
import (
26-
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
26+
"crypto/x509"
27+
"encoding/pem"
28+
"time"
29+
2730
"github.com/rs/zerolog"
2831
"github.com/rs/zerolog/log"
2932
"k8s.io/api/core/v1"
3033
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34+
35+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
3136
)
3237

3338
// upgradeDecision is the result of an upgrade check.
@@ -52,7 +57,7 @@ func (d *Reconciler) CreatePlan() error {
5257
apiObject := d.context.GetAPIObject()
5358
spec := d.context.GetSpec()
5459
status := d.context.GetStatus()
55-
newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods)
60+
newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods, d.context.GetTLSKeyfile)
5661

5762
// If not change, we're done
5863
if !changed {
@@ -76,7 +81,8 @@ func (d *Reconciler) CreatePlan() error {
7681
// Otherwise the new plan is returned with a boolean true.
7782
func createPlan(log zerolog.Logger, apiObject metav1.Object,
7883
currentPlan api.Plan, spec api.DeploymentSpec,
79-
status api.DeploymentStatus, pods []v1.Pod) (api.Plan, bool) {
84+
status api.DeploymentStatus, pods []v1.Pod,
85+
getTLSKeyfile func(group api.ServerGroup, member api.MemberStatus) (string, error)) (api.Plan, bool) {
8086
if len(currentPlan) > 0 {
8187
// Plan already exists, complete that first
8288
return currentPlan, false
@@ -158,6 +164,39 @@ func createPlan(log zerolog.Logger, apiObject metav1.Object,
158164
})
159165
}
160166

167+
// Check for the need to rotate TLS certificate of a members
168+
if len(plan) == 0 && spec.TLS.IsSecure() {
169+
status.Members.ForeachServerGroup(func(group api.ServerGroup, members *api.MemberStatusList) error {
170+
for _, m := range *members {
171+
if len(plan) > 0 {
172+
// Only 1 change at a time
173+
continue
174+
}
175+
if m.Phase != api.MemberPhaseCreated {
176+
// Only make changes when phase is created
177+
continue
178+
}
179+
// Load keyfile
180+
keyfile, err := getTLSKeyfile(group, m)
181+
if err != nil {
182+
log.Warn().Err(err).
183+
Str("role", group.AsRole()).
184+
Str("id", m.ID).
185+
Msg("Failed to get TLS secret")
186+
continue
187+
}
188+
renewalNeeded := tlsKeyfileNeedsRenewal(log, keyfile)
189+
if renewalNeeded {
190+
plan = append(append(plan,
191+
api.NewAction(api.ActionTypeRenewTLSCertificate, group, m.ID)),
192+
createRotateMemberPlan(log, m, group, "TLS certificate renewal")...,
193+
)
194+
}
195+
}
196+
return nil
197+
})
198+
}
199+
161200
// Return plan
162201
return plan, true
163202
}
@@ -233,6 +272,44 @@ func podNeedsRotation(p v1.Pod, apiObject metav1.Object, spec api.DeploymentSpec
233272
return false, ""
234273
}
235274

275+
// tlsKeyfileNeedsRenewal decides if the certificate in the given keyfile
276+
// should be renewed.
277+
func tlsKeyfileNeedsRenewal(log zerolog.Logger, keyfile string) bool {
278+
raw := []byte(keyfile)
279+
for {
280+
var derBlock *pem.Block
281+
derBlock, raw = pem.Decode(raw)
282+
if derBlock == nil {
283+
break
284+
}
285+
if derBlock.Type == "CERTIFICATE" {
286+
cert, err := x509.ParseCertificate(derBlock.Bytes)
287+
if err != nil {
288+
// We do not understand the certificate, let's renew it
289+
log.Warn().Err(err).Msg("Failed to parse x509 certificate. Renewing it")
290+
return true
291+
}
292+
if cert.IsCA {
293+
// Only look at the server certificate, not CA or intermediate
294+
continue
295+
}
296+
// Check expiration date. Renewal at 2/3 of lifetime.
297+
ttl := cert.NotAfter.Sub(cert.NotBefore)
298+
expirationDate := cert.NotBefore.Add((ttl / 3) * 2)
299+
if expirationDate.Before(time.Now()) {
300+
// We should renew now
301+
log.Debug().
302+
Str("not-before", cert.NotBefore.String()).
303+
Str("not-after", cert.NotAfter.String()).
304+
Str("expiration-date", expirationDate.String()).
305+
Msg("TLS certificate renewal needed")
306+
return true
307+
}
308+
}
309+
}
310+
return false
311+
}
312+
236313
// createScalePlan creates a scaling plan for a single server group
237314
func createScalePlan(log zerolog.Logger, members api.MemberStatusList, group api.ServerGroup, count int) api.Plan {
238315
var plan api.Plan

pkg/deployment/reconcile/plan_builder_test.go

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
package reconcile
2424

2525
import (
26+
"fmt"
2627
"testing"
2728

2829
"github.com/rs/zerolog"
@@ -36,6 +37,9 @@ import (
3637

3738
// TestCreatePlanSingleScale creates a `single` deployment to test the creating of scaling plan.
3839
func TestCreatePlanSingleScale(t *testing.T) {
40+
getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) {
41+
return "", maskAny(fmt.Errorf("Not implemented"))
42+
}
3943
log := zerolog.Nop()
4044
spec := api.DeploymentSpec{
4145
Mode: api.NewMode(api.DeploymentModeSingle),
@@ -51,7 +55,7 @@ func TestCreatePlanSingleScale(t *testing.T) {
5155

5256
// Test with empty status
5357
var status api.DeploymentStatus
54-
newPlan, changed := createPlan(log, depl, nil, spec, status, nil)
58+
newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
5559
assert.True(t, changed)
5660
assert.Len(t, newPlan, 0) // Single mode does not scale
5761

@@ -62,7 +66,7 @@ func TestCreatePlanSingleScale(t *testing.T) {
6266
PodName: "something",
6367
},
6468
}
65-
newPlan, changed = createPlan(log, depl, nil, spec, status, nil)
69+
newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
6670
assert.True(t, changed)
6771
assert.Len(t, newPlan, 0) // Single mode does not scale
6872

@@ -77,13 +81,16 @@ func TestCreatePlanSingleScale(t *testing.T) {
7781
PodName: "something1",
7882
},
7983
}
80-
newPlan, changed = createPlan(log, depl, nil, spec, status, nil)
84+
newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
8185
assert.True(t, changed)
8286
assert.Len(t, newPlan, 0) // Single mode does not scale
8387
}
8488

8589
// TestCreatePlanResilientSingleScale creates a `resilientsingle` deployment to test the creating of scaling plan.
8690
func TestCreatePlanResilientSingleScale(t *testing.T) {
91+
getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) {
92+
return "", maskAny(fmt.Errorf("Not implemented"))
93+
}
8794
log := zerolog.Nop()
8895
spec := api.DeploymentSpec{
8996
Mode: api.NewMode(api.DeploymentModeResilientSingle),
@@ -100,7 +107,7 @@ func TestCreatePlanResilientSingleScale(t *testing.T) {
100107

101108
// Test with empty status
102109
var status api.DeploymentStatus
103-
newPlan, changed := createPlan(log, depl, nil, spec, status, nil)
110+
newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
104111
assert.True(t, changed)
105112
require.Len(t, newPlan, 2)
106113
assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type)
@@ -113,7 +120,7 @@ func TestCreatePlanResilientSingleScale(t *testing.T) {
113120
PodName: "something",
114121
},
115122
}
116-
newPlan, changed = createPlan(log, depl, nil, spec, status, nil)
123+
newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
117124
assert.True(t, changed)
118125
require.Len(t, newPlan, 1)
119126
assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type)
@@ -138,7 +145,7 @@ func TestCreatePlanResilientSingleScale(t *testing.T) {
138145
PodName: "something4",
139146
},
140147
}
141-
newPlan, changed = createPlan(log, depl, nil, spec, status, nil)
148+
newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
142149
assert.True(t, changed)
143150
require.Len(t, newPlan, 2) // Note: Downscaling is only down 1 at a time
144151
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[0].Type)
@@ -149,6 +156,9 @@ func TestCreatePlanResilientSingleScale(t *testing.T) {
149156

150157
// TestCreatePlanClusterScale creates a `cluster` deployment to test the creating of scaling plan.
151158
func TestCreatePlanClusterScale(t *testing.T) {
159+
getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) {
160+
return "", maskAny(fmt.Errorf("Not implemented"))
161+
}
152162
log := zerolog.Nop()
153163
spec := api.DeploymentSpec{
154164
Mode: api.NewMode(api.DeploymentModeCluster),
@@ -164,7 +174,7 @@ func TestCreatePlanClusterScale(t *testing.T) {
164174

165175
// Test with empty status
166176
var status api.DeploymentStatus
167-
newPlan, changed := createPlan(log, depl, nil, spec, status, nil)
177+
newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
168178
assert.True(t, changed)
169179
require.Len(t, newPlan, 6) // Adding 3 dbservers & 3 coordinators (note: agents do not scale now)
170180
assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type)
@@ -197,7 +207,7 @@ func TestCreatePlanClusterScale(t *testing.T) {
197207
PodName: "coordinator1",
198208
},
199209
}
200-
newPlan, changed = createPlan(log, depl, nil, spec, status, nil)
210+
newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
201211
assert.True(t, changed)
202212
require.Len(t, newPlan, 3)
203213
assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type)
@@ -234,7 +244,7 @@ func TestCreatePlanClusterScale(t *testing.T) {
234244
}
235245
spec.DBServers.Count = util.NewInt(1)
236246
spec.Coordinators.Count = util.NewInt(1)
237-
newPlan, changed = createPlan(log, depl, nil, spec, status, nil)
247+
newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile)
238248
assert.True(t, changed)
239249
require.Len(t, newPlan, 5) // Note: Downscaling is done 1 at a time
240250
assert.Equal(t, api.ActionTypeCleanOutMember, newPlan[0].Type)

pkg/deployment/reconcile/plan_executor.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ func (d *Reconciler) createAction(ctx context.Context, log zerolog.Logger, actio
134134
return NewUpgradeMemberAction(log, action, actionCtx)
135135
case api.ActionTypeWaitForMemberUp:
136136
return NewWaitForMemberUpAction(log, action, actionCtx)
137+
case api.ActionTypeRenewTLSCertificate:
138+
return NewRenewTLSCertificateAction(log, action, actionCtx)
137139
default:
138140
panic(fmt.Sprintf("Unknown action type '%s'", action.Type))
139141
}

0 commit comments

Comments
 (0)