Skip to content

Commit 4c22a58

Browse files
committed
Create ServiceMonitor automatically.
1 parent 5c66073 commit 4c22a58

File tree

6 files changed

+107
-10
lines changed

6 files changed

+107
-10
lines changed

manifests/templates/deployment/rbac.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ rules:
2424
verbs: ["*"]
2525
- apiGroups: ["apiextensions.k8s.io"]
2626
resources: ["customresourcedefinitions"]
27-
verbs: ["get"]
27+
verbs: ["get", "list", "watch"]
2828
- apiGroups: [""]
2929
resources: ["pods", "services", "endpoints", "persistentvolumeclaims", "events", "secrets"]
3030
verbs: ["*"]

pkg/deployment/deployment.go

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/arangodb/arangosync/client"
3232
"github.com/rs/zerolog"
3333
"github.com/rs/zerolog/log"
34+
apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
3435
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3536
"k8s.io/client-go/kubernetes"
3637
"k8s.io/client-go/tools/record"
@@ -59,6 +60,7 @@ type Config struct {
5960
type Dependencies struct {
6061
Log zerolog.Logger
6162
KubeCli kubernetes.Interface
63+
KubeExtCli apiextensionsclient.Interface
6264
DatabaseCRCli versioned.Interface
6365
EventRecorder record.EventRecorder
6466
}
@@ -98,6 +100,7 @@ type Deployment struct {
98100
stopped int32
99101

100102
inspectTrigger trigger.Trigger
103+
inspectCRDTrigger trigger.Trigger
101104
updateDeploymentTrigger trigger.Trigger
102105
clientCache *clientCache
103106
recentInspectionErrors int
@@ -107,6 +110,7 @@ type Deployment struct {
107110
resources *resources.Resources
108111
chaosMonkey *chaos.Monkey
109112
syncClientCache client.ClientCache
113+
haveServiceMonitorCRD bool
110114
}
111115

112116
// New creates a new Deployment from the given API object.
@@ -136,6 +140,7 @@ func New(config Config, deps Dependencies, apiObject *api.ArangoDeployment) (*De
136140
go d.listenForPVCEvents(d.stopCh)
137141
go d.listenForSecretEvents(d.stopCh)
138142
go d.listenForServiceEvents(d.stopCh)
143+
go d.listenForCRDEvents(d.stopCh)
139144
if apiObject.Spec.GetMode() == api.DeploymentModeCluster {
140145
ci := newClusterScalingIntegration(d)
141146
d.clusterScalingIntegration = ci
@@ -202,8 +207,10 @@ func (d *Deployment) run() {
202207
}
203208

204209
// Create service monitor
205-
if err := d.resources.EnsureServiceMonitor(); err != nil {
206-
d.CreateEvent(k8sutil.NewErrorEvent("Failed to create service monitor", err, d.GetAPIObject()))
210+
if d.haveServiceMonitorCRD {
211+
if err := d.resources.EnsureServiceMonitor(); err != nil {
212+
d.CreateEvent(k8sutil.NewErrorEvent("Failed to create service monitor", err, d.GetAPIObject()))
213+
}
207214
}
208215

209216
// Create members
@@ -234,6 +241,8 @@ func (d *Deployment) run() {
234241
log.Info().Msg("start running...")
235242
}
236243

244+
d.lookForServiceMonitorCRD()
245+
237246
inspectionInterval := maxInspectionInterval
238247
for {
239248
select {
@@ -263,6 +272,8 @@ func (d *Deployment) run() {
263272
inspectionInterval = d.inspectDeployment(inspectionInterval)
264273
log.Debug().Str("interval", inspectionInterval.String()).Msg("...inspected deployment")
265274

275+
case <-d.inspectCRDTrigger.Done():
276+
d.lookForServiceMonitorCRD()
266277
case <-d.updateDeploymentTrigger.Done():
267278
inspectionInterval = minInspectionInterval
268279
if err := d.handleArangoDeploymentUpdatedEvent(); err != nil {
@@ -497,3 +508,29 @@ func (d *Deployment) isOwnerOf(obj metav1.Object) bool {
497508
}
498509
return ownerRefs[0].UID == d.apiObject.UID
499510
}
511+
512+
// lookForServiceMonitorCRD checks if there is a CRD for the ServiceMonitor
513+
// CR and sets the flag haveServiceMonitorCRD accordingly. This is called
514+
// once at creation time of the deployment and then always if the CRD
515+
// informer is triggered.
516+
func (d *Deployment) lookForServiceMonitorCRD() {
517+
_, err := d.deps.KubeExtCli.ApiextensionsV1beta1().CustomResourceDefinitions().Get("servicemonitors.monitoring.coreos.com", metav1.GetOptions{})
518+
log := d.deps.Log
519+
log.Debug().Msgf("looking for ServiceMonitor CRD...")
520+
if err == nil {
521+
if !d.haveServiceMonitorCRD {
522+
log.Info().Msgf("have discovered ServiceMonitor CRD")
523+
}
524+
d.haveServiceMonitorCRD = true
525+
d.triggerInspection()
526+
return
527+
} else if k8sutil.IsNotFound(err) {
528+
if d.haveServiceMonitorCRD {
529+
log.Info().Msgf("ServiceMonitor CRD no longer there")
530+
}
531+
d.haveServiceMonitorCRD = false
532+
return
533+
}
534+
log.Warn().Err(err).Msgf("error when looking for ServiceMonitor CRD")
535+
return
536+
}

pkg/deployment/deployment_inspector.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,13 @@ func (d *Deployment) inspectDeployment(lastInterval util.Interval) util.Interval
152152
hasError = true
153153
d.CreateEvent(k8sutil.NewErrorEvent("Service creation failed", err, d.apiObject))
154154
}
155+
if d.haveServiceMonitorCRD {
156+
if err := d.resources.EnsureServiceMonitor(); err != nil {
157+
hasError = true
158+
d.CreateEvent(k8sutil.NewErrorEvent("Service monitor creation failed", err, d.apiObject))
159+
}
160+
}
161+
155162
if err := d.resources.EnsurePVCs(); err != nil {
156163
hasError = true
157164
d.CreateEvent(k8sutil.NewErrorEvent("PVC creation failed", err, d.apiObject))
@@ -208,3 +215,8 @@ func (d *Deployment) inspectDeployment(lastInterval util.Interval) util.Interval
208215
func (d *Deployment) triggerInspection() {
209216
d.inspectTrigger.Trigger()
210217
}
218+
219+
// triggerCRDInspection ensures that an inspection is run soon.
220+
func (d *Deployment) triggerCRDInspection() {
221+
d.inspectCRDTrigger.Trigger()
222+
}

pkg/deployment/informers.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package deployment
2424

2525
import (
2626
"k8s.io/api/core/v1"
27+
v1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
2728
"k8s.io/client-go/tools/cache"
2829

2930
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
@@ -197,3 +198,36 @@ func (d *Deployment) listenForServiceEvents(stopCh <-chan struct{}) {
197198

198199
rw.Run(stopCh)
199200
}
201+
202+
// listenForCRDEvents keep listening for changes in CRDs until the given channel is closed.
203+
func (d *Deployment) listenForCRDEvents(stopCh <-chan struct{}) {
204+
//getCRD := func(obj interface{}) (*v1beta1.CustomResourceDefinition, bool) {
205+
// crd, ok := obj.(*v1beta1.CustomResourceDefinition)
206+
// if !ok {
207+
// tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
208+
// if !ok {
209+
// return nil, false
210+
// }
211+
// crd, ok = tombstone.Obj.(*v1beta1.CustomResourceDefinition)
212+
// return crd, ok
213+
// }
214+
// return crd, true
215+
// }
216+
217+
rw := k8sutil.NewResourceWatcher(
218+
d.deps.Log,
219+
d.deps.KubeExtCli.ApiextensionsV1beta1().RESTClient(),
220+
"customresourcedefinitions",
221+
"",
222+
&v1beta1.CustomResourceDefinition{},
223+
cache.ResourceEventHandlerFuncs{
224+
AddFunc: func(obj interface{}) {
225+
d.triggerCRDInspection()
226+
},
227+
DeleteFunc: func(obj interface{}) {
228+
d.triggerCRDInspection()
229+
},
230+
})
231+
232+
rw.Run(stopCh)
233+
}

pkg/deployment/resources/servicemonitor.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,9 @@ func (r *Resources) EnsureServiceMonitor() error {
5555
ns := apiObject.GetNamespace()
5656
owner := apiObject.AsOwner()
5757
spec := r.context.GetSpec()
58-
if !spec.Metrics.IsEnabled() {
59-
return nil
60-
}
58+
wantMetrics := spec.Metrics.IsEnabled()
6159
serviceMonitorName := deploymentName + "-exporter"
60+
log.Debug().Msgf("EnsureServiceMonitor running %s", serviceMonitorName)
6261

6362
// First get a client:
6463
var restConfig *rest.Config
@@ -77,6 +76,9 @@ func (r *Resources) EnsureServiceMonitor() error {
7776
_, err = serviceMonitors.Get(serviceMonitorName, metav1.GetOptions{})
7877
if err != nil {
7978
if k8sutil.IsNotFound(err) {
79+
if !wantMetrics {
80+
return nil
81+
}
8082
// Need to create one:
8183
smon := &coreosv1.ServiceMonitor{
8284
ObjectMeta: metav1.ObjectMeta{
@@ -106,13 +108,24 @@ func (r *Resources) EnsureServiceMonitor() error {
106108
log.Error().Err(err).Msgf("Failed to create ServiceMonitor %s", serviceMonitorName)
107109
return maskAny(err)
108110
}
111+
log.Debug().Msgf("ServiceMonitor %s successfully created.", serviceMonitorName)
112+
return nil
109113
} else {
110114
log.Error().Err(err).Msgf("Failed to get ServiceMonitor %s", serviceMonitorName)
111115
return maskAny(err)
112116
}
113117
}
114-
115-
log.Debug().Msgf("ServiceMonitor %s already found, no need to create.",
116-
serviceMonitorName)
117-
return nil
118+
if wantMetrics {
119+
log.Debug().Msgf("ServiceMonitor %s already found, no need to create.",
120+
serviceMonitorName)
121+
return nil
122+
}
123+
// Need to get rid of the ServiceMonitor:
124+
err = serviceMonitors.Delete(serviceMonitorName, &metav1.DeleteOptions{})
125+
if err == nil {
126+
log.Debug().Msgf("Deleted ServiceMonitor %s", serviceMonitorName)
127+
return nil
128+
}
129+
log.Error().Err(err).Msgf("Could not delete ServiceMonitor %s.", serviceMonitorName)
130+
return maskAny(err)
118131
}

pkg/operator/operator_deployment.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ func (o *Operator) makeDeploymentConfigAndDeps(apiObject *api.ArangoDeployment)
212212
Str("deployment", apiObject.GetName()).
213213
Logger(),
214214
KubeCli: o.Dependencies.KubeCli,
215+
KubeExtCli: o.Dependencies.KubeExtCli,
215216
DatabaseCRCli: o.Dependencies.CRCli,
216217
EventRecorder: o.Dependencies.EventRecorder,
217218
}

0 commit comments

Comments
 (0)