Skip to content

Commit 0b16165

Browse files
committed
feat: add dual CAS backend validation with configurable delays
Implement two separate background checkers to optimize validation frequency: - Default backends: every 30 minutes with 1min base + 0-5min jitter - All backends: every 24 hours without initial check Implementation: - Added InitialDelay field to CASBackendCheckerOpts for configurable delays - Added SkipFirstCheck flag to allow periodic-only validation - Changed OnlyDefaults from bool to *bool to distinguish explicit false from default true - Renamed CheckAllBackends to checkBackends (unexported) - Added toPtr helper function for pointer creation - Start two separate checker goroutines with different intervals and scopes This prevents thundering herd issues during pod startup, provides frequent validation for critical default backends, and comprehensive daily validation for all backends without excessive overhead. Signed-off-by: Miguel Martinez <miguel@chainloop.dev>
1 parent 73d16fe commit 0b16165

File tree

2 files changed

+59
-26
lines changed

2 files changed

+59
-26
lines changed

app/controlplane/cmd/main.go

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright 2024 The Chainloop Authors.
2+
// Copyright 2024-2025 The Chainloop Authors.
33
//
44
// Licensed under the Apache License, Version 2.0 (the "License");
55
// you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@ package main
1818
import (
1919
"context"
2020
"fmt"
21+
"math/rand"
2122
_ "net/http/pprof"
2223
"os"
2324
"time"
@@ -167,9 +168,28 @@ func main() {
167168
}
168169
}()
169170

170-
// Start the background CAS Backend checker
171+
// Start the background CAS Backend checker for DEFAULT backends (every 30 minutes)
171172
if app.casBackendChecker != nil {
172-
go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{CheckInterval: 30 * time.Minute})
173+
// Calculate initial delay: 1 minute base + 0-5 minutes jitter
174+
// This protects boot phase and spreads validation across pods
175+
baseDelay := 1 * time.Minute
176+
// #nosec G404 - using math/rand for jitter is acceptable, cryptographic randomness not required
177+
jitter := time.Duration(rand.Intn(5*60)) * time.Second
178+
initialDelay := baseDelay + jitter
179+
180+
go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{
181+
CheckInterval: 30 * time.Minute,
182+
InitialDelay: initialDelay,
183+
OnlyDefaults: toPtr(true),
184+
})
185+
186+
// Start the background CAS Backend checker for ALL backends (every 24 hours)
187+
// Start around 24h mark to avoid overlap with default checker
188+
go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{
189+
CheckInterval: 24 * time.Hour,
190+
SkipFirstCheck: true,
191+
OnlyDefaults: toPtr(false),
192+
})
173193
}
174194

175195
// start and wait for stop signal
@@ -178,6 +198,10 @@ func main() {
178198
}
179199
}
180200

201+
func toPtr[T any](v T) *T {
202+
return &v
203+
}
204+
181205
type app struct {
182206
*kratos.App
183207
// Periodic job that expires unfinished attestation processes older than a given threshold

app/controlplane/pkg/biz/casbackend_checker.go

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,14 @@ type CASBackendChecker struct {
3939

4040
type CASBackendCheckerOpts struct {
4141
// Whether to check only default backends or all backends
42-
OnlyDefaults bool
42+
OnlyDefaults *bool
4343
// Interval between checks, defaults to 30 minutes
4444
CheckInterval time.Duration
4545
// Timeout for each individual backend validation, defaults to 10 seconds
4646
ValidationTimeout time.Duration
47+
// Initial delay before first validation (includes jitter). If not set, runs immediately.
48+
InitialDelay time.Duration
49+
SkipFirstCheck bool
4750
}
4851

4952
// NewCASBackendChecker creates a new CAS backend checker that will periodically validate
@@ -65,44 +68,58 @@ func (c *CASBackendChecker) Start(ctx context.Context, opts *CASBackendCheckerOp
6568
}
6669

6770
onlyDefaults := true
68-
if opts != nil {
69-
onlyDefaults = opts.OnlyDefaults
71+
if opts != nil && opts.OnlyDefaults != nil {
72+
onlyDefaults = *opts.OnlyDefaults
7073
}
7174

7275
// Apply validation timeout from options if provided
7376
if opts != nil && opts.ValidationTimeout > 0 {
7477
c.validationTimeout = opts.ValidationTimeout
7578
}
7679

77-
ticker := time.NewTicker(interval)
78-
defer ticker.Stop()
80+
// Apply initial delay from options if provided
81+
var initialDelay = 0 * time.Second
82+
if opts != nil && !opts.SkipFirstCheck && opts.InitialDelay > 0 {
83+
initialDelay = opts.InitialDelay
84+
}
85+
86+
c.logger.Infow("msg", "CAS backend checker configured", "skipFirstCheck", opts.SkipFirstCheck, "initialDelay", initialDelay, "interval", interval, "allBackends", !onlyDefaults, "timeout", c.validationTimeout)
87+
88+
select {
89+
case <-ctx.Done():
90+
c.logger.Info("CAS backend checker stopping due to context cancellation before initial check")
91+
return
92+
case <-time.After(initialDelay):
93+
// Continue to first check
94+
}
7995

80-
// Run one check immediately
81-
if err := c.CheckAllBackends(ctx, onlyDefaults); err != nil {
82-
c.logger.Errorf("initial CAS backend check failed: %v", err)
96+
if opts != nil && !opts.SkipFirstCheck {
97+
// Run first check
98+
if err := c.checkBackends(ctx, onlyDefaults); err != nil {
99+
c.logger.Errorf("initial CAS backend check failed: %v", err)
100+
}
83101
}
84102

85-
c.logger.Infof("CAS backend checker started with interval %s, checking %s, timeout %s",
86-
interval,
87-
conditionalString(onlyDefaults, "only default backends", "all backends"),
88-
c.validationTimeout)
103+
// Start periodic checks
104+
ticker := time.NewTicker(interval)
105+
defer ticker.Stop()
89106

90107
for {
91108
select {
92109
case <-ctx.Done():
93110
c.logger.Info("CAS backend checker stopping due to context cancellation")
94111
return
95112
case <-ticker.C:
96-
if err := c.CheckAllBackends(ctx, onlyDefaults); err != nil {
113+
if err := c.checkBackends(ctx, onlyDefaults); err != nil {
97114
c.logger.Errorf("periodic CAS backend check failed: %v", err)
98115
}
99116
}
100117
}
101118
}
102119

103-
// CheckAllBackends validates all CAS backends (or just default ones based on configuration)
120+
// checkBackends validates all CAS backends (or just default ones based on configuration)
104121
// using a worker pool for parallel processing with timeouts
105-
func (c *CASBackendChecker) CheckAllBackends(ctx context.Context, onlyDefaults bool) error {
122+
func (c *CASBackendChecker) checkBackends(ctx context.Context, onlyDefaults bool) error {
106123
c.logger.Debug("starting CAS backend validation check")
107124

108125
backends, err := c.casBackendRepo.ListBackends(ctx, onlyDefaults)
@@ -138,11 +155,3 @@ func (c *CASBackendChecker) CheckAllBackends(ctx context.Context, onlyDefaults b
138155
c.logger.Debug("all CAS backend validations completed")
139156
return nil
140157
}
141-
142-
// Helper function to return different strings based on a condition
143-
func conditionalString(condition bool, trueStr, falseStr string) string {
144-
if condition {
145-
return trueStr
146-
}
147-
return falseStr
148-
}

0 commit comments

Comments
 (0)