From 0b161658488486c7b243643f9fd04a6c1e3e72c7 Mon Sep 17 00:00:00 2001 From: Miguel Martinez Date: Sun, 9 Nov 2025 22:22:54 +0100 Subject: [PATCH 1/2] feat: add dual CAS backend validation with configurable delays Implement two separate background checkers to optimize validation frequency: - Default backends: every 30 minutes with 1min base + 0-5min jitter - All backends: every 24 hours without initial check Implementation: - Added InitialDelay field to CASBackendCheckerOpts for configurable delays - Added SkipFirstCheck flag to allow periodic-only validation - Changed OnlyDefaults from bool to *bool to distinguish explicit false from default true - Renamed CheckAllBackends to checkBackends (unexported) - Added toPtr helper function for pointer creation - Start two separate checker goroutines with different intervals and scopes This prevents thundering herd issues during pod startup, provides frequent validation for critical default backends, and comprehensive daily validation for all backends without excessive overhead. Signed-off-by: Miguel Martinez --- app/controlplane/cmd/main.go | 30 +++++++++- .../pkg/biz/casbackend_checker.go | 55 +++++++++++-------- 2 files changed, 59 insertions(+), 26 deletions(-) diff --git a/app/controlplane/cmd/main.go b/app/controlplane/cmd/main.go index b2ff70207..614ace014 100644 --- a/app/controlplane/cmd/main.go +++ b/app/controlplane/cmd/main.go @@ -1,5 +1,5 @@ // -// Copyright 2024 The Chainloop Authors. +// Copyright 2024-2025 The Chainloop Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ package main import ( "context" "fmt" + "math/rand" _ "net/http/pprof" "os" "time" @@ -167,9 +168,28 @@ func main() { } }() - // Start the background CAS Backend checker + // Start the background CAS Backend checker for DEFAULT backends (every 30 minutes) if app.casBackendChecker != nil { - go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{CheckInterval: 30 * time.Minute}) + // Calculate initial delay: 1 minute base + 0-5 minutes jitter + // This protects boot phase and spreads validation across pods + baseDelay := 1 * time.Minute + // #nosec G404 - using math/rand for jitter is acceptable, cryptographic randomness not required + jitter := time.Duration(rand.Intn(5*60)) * time.Second + initialDelay := baseDelay + jitter + + go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{ + CheckInterval: 30 * time.Minute, + InitialDelay: initialDelay, + OnlyDefaults: toPtr(true), + }) + + // Start the background CAS Backend checker for ALL backends (every 24 hours) + // Start around 24h mark to avoid overlap with default checker + go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{ + CheckInterval: 24 * time.Hour, + SkipFirstCheck: true, + OnlyDefaults: toPtr(false), + }) } // start and wait for stop signal @@ -178,6 +198,10 @@ func main() { } } +func toPtr[T any](v T) *T { + return &v +} + type app struct { *kratos.App // Periodic job that expires unfinished attestation processes older than a given threshold diff --git a/app/controlplane/pkg/biz/casbackend_checker.go b/app/controlplane/pkg/biz/casbackend_checker.go index bbb3ab0ce..106776fb3 100644 --- a/app/controlplane/pkg/biz/casbackend_checker.go +++ b/app/controlplane/pkg/biz/casbackend_checker.go @@ -39,11 +39,14 @@ type CASBackendChecker struct { type CASBackendCheckerOpts struct { // Whether to check only default backends or all backends - OnlyDefaults bool + OnlyDefaults *bool // Interval between checks, defaults to 30 minutes CheckInterval time.Duration // Timeout for each individual backend validation, defaults to 10 seconds ValidationTimeout time.Duration + // Initial delay before first validation (includes jitter). If not set, runs immediately. + InitialDelay time.Duration + SkipFirstCheck bool } // NewCASBackendChecker creates a new CAS backend checker that will periodically validate @@ -65,8 +68,8 @@ func (c *CASBackendChecker) Start(ctx context.Context, opts *CASBackendCheckerOp } onlyDefaults := true - if opts != nil { - onlyDefaults = opts.OnlyDefaults + if opts != nil && opts.OnlyDefaults != nil { + onlyDefaults = *opts.OnlyDefaults } // Apply validation timeout from options if provided @@ -74,18 +77,32 @@ func (c *CASBackendChecker) Start(ctx context.Context, opts *CASBackendCheckerOp c.validationTimeout = opts.ValidationTimeout } - ticker := time.NewTicker(interval) - defer ticker.Stop() + // Apply initial delay from options if provided + var initialDelay = 0 * time.Second + if opts != nil && !opts.SkipFirstCheck && opts.InitialDelay > 0 { + initialDelay = opts.InitialDelay + } + + c.logger.Infow("msg", "CAS backend checker configured", "skipFirstCheck", opts.SkipFirstCheck, "initialDelay", initialDelay, "interval", interval, "allBackends", !onlyDefaults, "timeout", c.validationTimeout) + + select { + case <-ctx.Done(): + c.logger.Info("CAS backend checker stopping due to context cancellation before initial check") + return + case <-time.After(initialDelay): + // Continue to first check + } - // Run one check immediately - if err := c.CheckAllBackends(ctx, onlyDefaults); err != nil { - c.logger.Errorf("initial CAS backend check failed: %v", err) + if opts != nil && !opts.SkipFirstCheck { + // Run first check + if err := c.checkBackends(ctx, onlyDefaults); err != nil { + c.logger.Errorf("initial CAS backend check failed: %v", err) + } } - c.logger.Infof("CAS backend checker started with interval %s, checking %s, timeout %s", - interval, - conditionalString(onlyDefaults, "only default backends", "all backends"), - c.validationTimeout) + // Start periodic checks + ticker := time.NewTicker(interval) + defer ticker.Stop() for { select { @@ -93,16 +110,16 @@ func (c *CASBackendChecker) Start(ctx context.Context, opts *CASBackendCheckerOp c.logger.Info("CAS backend checker stopping due to context cancellation") return case <-ticker.C: - if err := c.CheckAllBackends(ctx, onlyDefaults); err != nil { + if err := c.checkBackends(ctx, onlyDefaults); err != nil { c.logger.Errorf("periodic CAS backend check failed: %v", err) } } } } -// CheckAllBackends validates all CAS backends (or just default ones based on configuration) +// checkBackends validates all CAS backends (or just default ones based on configuration) // using a worker pool for parallel processing with timeouts -func (c *CASBackendChecker) CheckAllBackends(ctx context.Context, onlyDefaults bool) error { +func (c *CASBackendChecker) checkBackends(ctx context.Context, onlyDefaults bool) error { c.logger.Debug("starting CAS backend validation check") backends, err := c.casBackendRepo.ListBackends(ctx, onlyDefaults) @@ -138,11 +155,3 @@ func (c *CASBackendChecker) CheckAllBackends(ctx context.Context, onlyDefaults b c.logger.Debug("all CAS backend validations completed") return nil } - -// Helper function to return different strings based on a condition -func conditionalString(condition bool, trueStr, falseStr string) string { - if condition { - return trueStr - } - return falseStr -} From 835edb056a1e954a243291c2803c6006f86bcb0d Mon Sep 17 00:00:00 2001 From: Miguel Martinez Date: Mon, 10 Nov 2025 11:26:21 +0100 Subject: [PATCH 2/2] clear goroutines Signed-off-by: Miguel Martinez --- app/controlplane/cmd/main.go | 6 +++--- app/controlplane/pkg/biz/casbackend_checker.go | 15 ++++++--------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/app/controlplane/cmd/main.go b/app/controlplane/cmd/main.go index 614ace014..4963feb7e 100644 --- a/app/controlplane/cmd/main.go +++ b/app/controlplane/cmd/main.go @@ -186,9 +186,9 @@ func main() { // Start the background CAS Backend checker for ALL backends (every 24 hours) // Start around 24h mark to avoid overlap with default checker go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{ - CheckInterval: 24 * time.Hour, - SkipFirstCheck: true, - OnlyDefaults: toPtr(false), + CheckInterval: 24 * time.Hour, + InitialDelay: 24 * time.Hour, + OnlyDefaults: toPtr(false), }) } diff --git a/app/controlplane/pkg/biz/casbackend_checker.go b/app/controlplane/pkg/biz/casbackend_checker.go index 106776fb3..1dd5af32b 100644 --- a/app/controlplane/pkg/biz/casbackend_checker.go +++ b/app/controlplane/pkg/biz/casbackend_checker.go @@ -45,8 +45,7 @@ type CASBackendCheckerOpts struct { // Timeout for each individual backend validation, defaults to 10 seconds ValidationTimeout time.Duration // Initial delay before first validation (includes jitter). If not set, runs immediately. - InitialDelay time.Duration - SkipFirstCheck bool + InitialDelay time.Duration } // NewCASBackendChecker creates a new CAS backend checker that will periodically validate @@ -79,11 +78,11 @@ func (c *CASBackendChecker) Start(ctx context.Context, opts *CASBackendCheckerOp // Apply initial delay from options if provided var initialDelay = 0 * time.Second - if opts != nil && !opts.SkipFirstCheck && opts.InitialDelay > 0 { + if opts != nil && opts.InitialDelay > 0 { initialDelay = opts.InitialDelay } - c.logger.Infow("msg", "CAS backend checker configured", "skipFirstCheck", opts.SkipFirstCheck, "initialDelay", initialDelay, "interval", interval, "allBackends", !onlyDefaults, "timeout", c.validationTimeout) + c.logger.Infow("msg", "CAS backend checker configured", "initialDelay", initialDelay, "interval", interval, "allBackends", !onlyDefaults, "timeout", c.validationTimeout) select { case <-ctx.Done(): @@ -93,11 +92,9 @@ func (c *CASBackendChecker) Start(ctx context.Context, opts *CASBackendCheckerOp // Continue to first check } - if opts != nil && !opts.SkipFirstCheck { - // Run first check - if err := c.checkBackends(ctx, onlyDefaults); err != nil { - c.logger.Errorf("initial CAS backend check failed: %v", err) - } + // Run first check + if err := c.checkBackends(ctx, onlyDefaults); err != nil { + c.logger.Errorf("initial CAS backend check failed: %v", err) } // Start periodic checks