diff --git a/.changes/unreleased/ENHANCEMENTS-664-20251120-093600.yaml b/.changes/unreleased/ENHANCEMENTS-664-20251120-093600.yaml new file mode 100644 index 00000000..e23655ef --- /dev/null +++ b/.changes/unreleased/ENHANCEMENTS-664-20251120-093600.yaml @@ -0,0 +1,5 @@ +kind: ENHANCEMENTS +body: 'AgentPool: This change provisions autoscaling of agents for speculative plan-only runs. The number of agents is determined by adding plan-only runs and workspace count' +time: 2025-11-20T09:36:00.142836-08:00 +custom: + PR: "664" diff --git a/Makefile b/Makefile index dd6e5294..d6ce2361 100644 --- a/Makefile +++ b/Makefile @@ -154,7 +154,7 @@ test-unit: fmt vet copywrite ## Run internal/controller tests. -timeout 5m \ -count 1 \ -v \ - -run="^Test(DoNotRequeue|RequeueAfter|RequeueOnErr|FormatOutput|FinalizerBehaviors|MatchWildcardName|ValidateTFEVersion)$$" + -run="^Test(DoNotRequeue|RequeueAfter|RequeueOnErr|FormatOutput|FinalizerBehaviors|MatchWildcardName|ValidateTFEVersion|PendingWorkspaceRuns)$$" .PHONY: test-helm test-helm: ## Run Helm chart tests. diff --git a/go.mod b/go.mod index eee656e4..4af45cab 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/onsi/gomega v1.36.3 github.com/prometheus/client_golang v1.22.0 github.com/stretchr/testify v1.11.1 + go.uber.org/mock v0.6.0 go.uber.org/zap v1.27.0 k8s.io/api v0.34.1 k8s.io/apimachinery v0.34.1 @@ -58,14 +59,14 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/net v0.38.0 // indirect + golang.org/x/net v0.43.0 // indirect golang.org/x/oauth2 v0.28.0 // indirect golang.org/x/sync v0.17.0 // indirect - golang.org/x/sys v0.32.0 // indirect - golang.org/x/term v0.30.0 // indirect - golang.org/x/text v0.23.0 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.org/x/term v0.34.0 // indirect + golang.org/x/text v0.28.0 // indirect golang.org/x/time v0.12.0 // indirect - golang.org/x/tools v0.31.0 // indirect + golang.org/x/tools v0.36.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/protobuf v1.36.5 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect diff --git a/go.sum b/go.sum index f4503b82..df9129cd 100644 --- a/go.sum +++ b/go.sum @@ -140,6 +140,8 @@ go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= +go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= @@ -157,8 +159,8 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc= golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -169,22 +171,22 @@ golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= -golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= +golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= -golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU= -golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/controller/agentpool_controller_autoscaling.go b/internal/controller/agentpool_controller_autoscaling.go index ec4252bc..7d8226cf 100644 --- a/internal/controller/agentpool_controller_autoscaling.go +++ b/internal/controller/agentpool_controller_autoscaling.go @@ -61,7 +61,7 @@ func matchWildcardName(wildcard string, str string) bool { } } -// pendingWorkspaceRuns returns the number of workspaces with pending runs for a given agent pool. +// pendingWorkspaceRuns returns the number pending runs for a given agent pool. // This function is compatible with HCP Terraform and TFE version v202409-1 and later. func pendingWorkspaceRuns(ctx context.Context, ap *agentPoolInstance) (int32, error) { runs := map[string]struct{}{} @@ -74,7 +74,7 @@ func pendingWorkspaceRuns(ctx context.Context, ap *agentPoolInstance) (int32, er PageNumber: initPageNumber, }, } - + planOnlyRunCount := 0 for { runsList, err := ap.tfClient.Client.Runs.ListForOrganization(ctx, ap.instance.Spec.Organization, listOpts) if err != nil { @@ -87,6 +87,11 @@ func pendingWorkspaceRuns(ctx context.Context, ap *agentPoolInstance) (int32, er awaitingUserInteractionRuns[string(run.Status)]++ continue } + // Count plan-only runs separately so agents can scale up and execute runs parallely + if run.PlanOnly { + planOnlyRunCount++ + continue + } runs[run.Workspace.ID] = struct{}{} } if runsList.NextPage == 0 { @@ -97,8 +102,9 @@ func pendingWorkspaceRuns(ctx context.Context, ap *agentPoolInstance) (int32, er // TODO: // Add metric(s) for runs awaiting user interaction - - return int32(len(runs)), nil + totalPendingRuns := len(runs) + planOnlyRunCount + ap.log.Info("Runs", "msg", fmt.Sprintf("Workspaces: %+v Plan-only runs: %d Total pending runs: %d", runs, planOnlyRunCount, totalPendingRuns)) + return int32(totalPendingRuns), nil } // computeRequiredAgents is a legacy algorithm that is used to compute the number of agents needed. @@ -266,7 +272,7 @@ func (r *AgentPoolReconciler) reconcileAgentAutoscaling(ctx context.Context, ap r.Recorder.Eventf(&ap.instance, corev1.EventTypeWarning, "AutoscaleAgentPoolDeployment", "Autoscaling failed: %v", err.Error()) return err } - ap.log.Info("Reconcile Agent Autoscaling", "msg", fmt.Sprintf("%d workspaces have pending runs", requiredAgents)) + ap.log.Info("Reconcile Agent Autoscaling", "msg", fmt.Sprintf("%d agents are required", requiredAgents)) currentReplicas, err := r.getAgentDeploymentReplicas(ctx, ap) if err != nil { diff --git a/internal/controller/agentpool_controller_autoscaling_test.go b/internal/controller/agentpool_controller_autoscaling_test.go index 2bbf5ee7..10ca62a8 100644 --- a/internal/controller/agentpool_controller_autoscaling_test.go +++ b/internal/controller/agentpool_controller_autoscaling_test.go @@ -4,17 +4,25 @@ package controller import ( + "context" + "errors" "fmt" + "testing" "time" + "github.com/go-logr/logr" tfc "github.com/hashicorp/go-tfe" + "github.com/hashicorp/go-tfe/mocks" appv1alpha2 "github.com/hashicorp/hcp-terraform-operator/api/v1alpha2" "github.com/hashicorp/hcp-terraform-operator/internal/pointer" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + gomock "go.uber.org/mock/gomock" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" + k8sapierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/stretchr/testify/assert" ) var _ = Describe("Agent Pool controller", Ordered, func() { @@ -77,7 +85,7 @@ var _ = Describe("Agent Pool controller", Ordered, func() { Expect(k8sClient.Delete(ctx, instance)).To(Succeed()) Eventually(func() bool { err := k8sClient.Get(ctx, namespacedName, instance) - return errors.IsNotFound(err) + return k8sapierrors.IsNotFound(err) }).Should(BeTrue()) }) @@ -194,3 +202,146 @@ var _ = Describe("Agent Pool controller", Ordered, func() { }) }) }) + +func TestPendingWorkspaceRuns(t *testing.T) { + tests := []struct { + name string + mockRuns []*tfc.Run + mockErr error + expectedCount int32 + expectError bool + }{ + { + name: "returns error from client", + mockErr: errors.New("api error"), + expectedCount: 0, + expectError: true, + }, + { + name: "counts plan-only runs", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws2"}}, + }, + expectedCount: 2, + expectError: false, + }, + { + name: "skips user interaction runs", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: false, Status: tfc.RunPlanned, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: false, Status: tfc.RunPolicyOverride, Workspace: &tfc.Workspace{ID: "ws2"}}, + }, + expectedCount: 0, + expectError: false, + }, + { + name: "counts normal pending runs", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws2"}}, + }, + expectedCount: 2, + expectError: false, + }, + { + name: "mix of plan-only and normal runs", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws2"}}, + }, + expectedCount: 2, + expectError: false, + }, + { + name: "plan-only runs for single workspace", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run3", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run4", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + }, + expectedCount: 4, + expectError: false, + }, + { + name: "single apply and multiple plan-only runs for single workspace", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run3", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run4", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run5", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + }, + expectedCount: 5, + expectError: false, + }, + { + name: "mix of plan-only and apply runs for single workspace", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run3", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run4", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run5", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + }, + expectedCount: 4, + expectError: false, + }, + { + name: "mix of plan-only and apply runs for multiple workspaces", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws2"}}, + {ID: "run3", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws3"}}, + {ID: "run4", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run5", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + }, + expectedCount: 5, + expectError: false, + }, + { + name: "mix of plan-only and apply runs for two workspaces", + mockRuns: []*tfc.Run{ + {ID: "run1", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws1"}}, + {ID: "run2", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws2"}}, + {ID: "run3", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws2"}}, + {ID: "run4", PlanOnly: true, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws2"}}, + {ID: "run5", PlanOnly: false, Status: tfc.RunPlanning, Workspace: &tfc.Workspace{ID: "ws2"}}, + }, + expectedCount: 4, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + mockRuns := mocks.NewMockRuns(ctrl) + mockRuns.EXPECT(). + ListForOrganization(gomock.Any(), "test-org", gomock.Any()). + Return(&tfc.OrganizationRunList{Items: tt.mockRuns, PaginationNextPrev: &tfc.PaginationNextPrev{NextPage: 0}}, tt.mockErr) + + ap := &agentPoolInstance{ + tfClient: HCPTerraformClient{Client: &tfc.Client{Runs: mockRuns}}, + instance: appv1alpha2.AgentPool{ + Spec: appv1alpha2.AgentPoolSpec{ + Name: "test-pool", + Organization: "test-org", + }, + }, + log: logr.Logger{}, + } + + count, err := pendingWorkspaceRuns(context.Background(), ap) + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedCount, count) + } + }) + } +}