@@ -27,26 +27,35 @@ import (
2727 "time"
2828
2929 api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
30+ "github.com/arangodb/kube-arangodb/pkg/metrics"
31+ "github.com/arangodb/kube-arangodb/pkg/util"
3032 "github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
3133 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3234)
3335
36+ var (
37+ inspectDeploymentDurationGauges = metrics .MustRegisterGaugeVec (metricsComponent , "inspect_deployment_duration" , "Amount of time taken by a single inspection of a deployment (in sec)" , metrics .DeploymentName )
38+ )
39+
3440// inspectDeployment inspects the entire deployment, creates
3541// a plan to update if needed and inspects underlying resources.
3642// This function should be called when:
3743// - the deployment has changed
3844// - any of the underlying resources has changed
3945// - once in a while
4046// Returns the delay until this function should be called again.
41- func (d * Deployment ) inspectDeployment (lastInterval time. Duration ) time. Duration {
47+ func (d * Deployment ) inspectDeployment (lastInterval util. Interval ) util. Interval {
4248 log := d .deps .Log
49+ start := time .Now ()
4350
4451 nextInterval := lastInterval
4552 hasError := false
4653 ctx := context .Background ()
54+ deploymentName := d .apiObject .GetName ()
55+ defer metrics .SetDuration (inspectDeploymentDurationGauges .WithLabelValues (deploymentName ), start )
4756
4857 // Check deployment still exists
49- updated , err := d .deps .DatabaseCRCli .DatabaseV1alpha ().ArangoDeployments (d .apiObject .GetNamespace ()).Get (d . apiObject . GetName () , metav1.GetOptions {})
58+ updated , err := d .deps .DatabaseCRCli .DatabaseV1alpha ().ArangoDeployments (d .apiObject .GetNamespace ()).Get (deploymentName , metav1.GetOptions {})
5059 if k8sutil .IsNotFound (err ) {
5160 // Deployment is gone
5261 log .Info ().Msg ("Deployment is gone" )
@@ -87,13 +96,17 @@ func (d *Deployment) inspectDeployment(lastInterval time.Duration) time.Duration
8796 }
8897
8998 // Inspection of generated resources needed
90- if err := d .resources .InspectPods (ctx ); err != nil {
99+ if x , err := d .resources .InspectPods (ctx ); err != nil {
91100 hasError = true
92101 d .CreateEvent (k8sutil .NewErrorEvent ("Pod inspection failed" , err , d .apiObject ))
102+ } else {
103+ nextInterval = nextInterval .ReduceTo (x )
93104 }
94- if err := d .resources .InspectPVCs (ctx ); err != nil {
105+ if x , err := d .resources .InspectPVCs (ctx ); err != nil {
95106 hasError = true
96107 d .CreateEvent (k8sutil .NewErrorEvent ("PVC inspection failed" , err , d .apiObject ))
108+ } else {
109+ nextInterval = nextInterval .ReduceTo (x )
97110 }
98111
99112 // Check members for resilience
@@ -149,9 +162,11 @@ func (d *Deployment) inspectDeployment(lastInterval time.Duration) time.Duration
149162 }
150163
151164 // At the end of the inspect, we cleanup terminated pods.
152- if err := d .resources .CleanupTerminatedPods (); err != nil {
165+ if x , err := d .resources .CleanupTerminatedPods (); err != nil {
153166 hasError = true
154167 d .CreateEvent (k8sutil .NewErrorEvent ("Pod cleanup failed" , err , d .apiObject ))
168+ } else {
169+ nextInterval = nextInterval .ReduceTo (x )
155170 }
156171 }
157172
@@ -164,10 +179,7 @@ func (d *Deployment) inspectDeployment(lastInterval time.Duration) time.Duration
164179 } else {
165180 d .recentInspectionErrors = 0
166181 }
167- if nextInterval > maxInspectionInterval {
168- nextInterval = maxInspectionInterval
169- }
170- return nextInterval
182+ return nextInterval .ReduceTo (maxInspectionInterval )
171183}
172184
173185// triggerInspection ensures that an inspection is run soon.
0 commit comments