Skip to content

Commit 16b813d

Browse files
committed
feat: add configurable initial delay with jitter to CAS backend validation
Add InitialDelay field to CASBackendCheckerOpts to allow configuring the initial delay before first validation. This prevents thundering herd issues when multiple pods start simultaneously. Implementation: - CAS backend checker now accepts InitialDelay via options - Default behavior: 1 minute base delay + 0-5 minutes jitter (calculated in main.go) - Changed OnlyDefaults from bool to *bool to distinguish between explicit false and default true - Validation runs after initial delay, then continues with periodic checks This ensures validation happens in background without affecting boot performance and spreads validation load across pods during rolling deployments. Signed-off-by: Miguel Martinez <miguel@chainloop.dev>
1 parent 73d16fe commit 16b813d

File tree

2 files changed

+45
-12
lines changed

2 files changed

+45
-12
lines changed

app/controlplane/cmd/main.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright 2024 The Chainloop Authors.
2+
// Copyright 2024-2025 The Chainloop Authors.
33
//
44
// Licensed under the Apache License, Version 2.0 (the "License");
55
// you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@ package main
1818
import (
1919
"context"
2020
"fmt"
21+
"math/rand"
2122
_ "net/http/pprof"
2223
"os"
2324
"time"
@@ -169,7 +170,20 @@ func main() {
169170

170171
// Start the background CAS Backend checker
171172
if app.casBackendChecker != nil {
172-
go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{CheckInterval: 30 * time.Minute})
173+
// Calculate initial delay: 1 minute base + 0-5 minutes jitter
174+
// This protects boot phase and spreads validation across pods
175+
baseDelay := 1 * time.Minute
176+
// #nosec G404 - using math/rand for jitter is acceptable, cryptographic randomness not required
177+
jitter := time.Duration(rand.Intn(5*60)) * time.Second
178+
initialDelay := baseDelay + jitter
179+
180+
// check all backends
181+
var onlyDefaults = false
182+
go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{
183+
CheckInterval: 30 * time.Minute,
184+
InitialDelay: initialDelay,
185+
OnlyDefaults: &onlyDefaults,
186+
})
173187
}
174188

175189
// start and wait for stop signal

app/controlplane/pkg/biz/casbackend_checker.go

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,13 @@ type CASBackendChecker struct {
3939

4040
type CASBackendCheckerOpts struct {
4141
// Whether to check only default backends or all backends
42-
OnlyDefaults bool
42+
OnlyDefaults *bool
4343
// Interval between checks, defaults to 30 minutes
4444
CheckInterval time.Duration
4545
// Timeout for each individual backend validation, defaults to 10 seconds
4646
ValidationTimeout time.Duration
47+
// Initial delay before first validation (includes jitter). If not set, runs immediately.
48+
InitialDelay time.Duration
4749
}
4850

4951
// NewCASBackendChecker creates a new CAS backend checker that will periodically validate
@@ -65,28 +67,45 @@ func (c *CASBackendChecker) Start(ctx context.Context, opts *CASBackendCheckerOp
6567
}
6668

6769
onlyDefaults := true
68-
if opts != nil {
69-
onlyDefaults = opts.OnlyDefaults
70+
if opts != nil && opts.OnlyDefaults != nil {
71+
onlyDefaults = *opts.OnlyDefaults
7072
}
7173

7274
// Apply validation timeout from options if provided
7375
if opts != nil && opts.ValidationTimeout > 0 {
7476
c.validationTimeout = opts.ValidationTimeout
7577
}
7678

77-
ticker := time.NewTicker(interval)
78-
defer ticker.Stop()
79-
80-
// Run one check immediately
81-
if err := c.CheckAllBackends(ctx, onlyDefaults); err != nil {
82-
c.logger.Errorf("initial CAS backend check failed: %v", err)
79+
// Apply initial delay from options if provided
80+
var initialDelay = 0 * time.Second
81+
if opts != nil && opts.InitialDelay > 0 {
82+
initialDelay = opts.InitialDelay
8383
}
8484

85-
c.logger.Infof("CAS backend checker started with interval %s, checking %s, timeout %s",
85+
// Wait for initial delay before starting if configured
86+
c.logger.Infof("CAS backend checker will start in %s, then run every %s, checking %s, timeout %s",
87+
initialDelay,
8688
interval,
8789
conditionalString(onlyDefaults, "only default backends", "all backends"),
8890
c.validationTimeout)
8991

92+
select {
93+
case <-ctx.Done():
94+
c.logger.Info("CAS backend checker stopping due to context cancellation before initial check")
95+
return
96+
case <-time.After(initialDelay):
97+
// Continue to first check
98+
}
99+
100+
// Run first check
101+
if err := c.CheckAllBackends(ctx, onlyDefaults); err != nil {
102+
c.logger.Errorf("initial CAS backend check failed: %v", err)
103+
}
104+
105+
// Start periodic checks
106+
ticker := time.NewTicker(interval)
107+
defer ticker.Stop()
108+
90109
for {
91110
select {
92111
case <-ctx.Done():

0 commit comments

Comments
 (0)