Skip to content

Commit 6cb7eb3

Browse files
authored
🚀 Add deletionPolicy to Agent Pool controller (#584)
1 parent dcc8862 commit 6cb7eb3

12 files changed

+329
-23
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
kind: BREAKING CHANGES
2+
body: '`AgentPool`: The new field, `spec.deletionPolicy`, is set to `retain` by default, which changes the previous default controller behavior when resources are deleted. The previous behavior corresponded to the `destroy` deletion policy value. This change is considered safer in cases of accidental resource deletion, planned migration, or other scenarios involving the deletion of a custom resource.'
3+
time: 2025-04-15T12:48:06.601466+02:00
4+
custom:
5+
PR: "584"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
kind: ENHANCEMENTS
2+
body: '`AgentPool`: Add a new field, `spec.deletionPolicy`, that specifies the behavior of the custom resource and its associated agent pool when the custom resource is deleted.'
3+
time: 2025-04-15T12:49:14.665714+02:00
4+
custom:
5+
PR: "584"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
kind: NOTES
2+
body: The `AgentPool` CRD has been changed. Please follow the Helm chart instructions on how to upgrade it.
3+
time: 2025-04-15T12:50:14.615385+02:00
4+
custom:
5+
PR: "494"

api/v1alpha2/agentpool_types.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@ import (
88
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
99
)
1010

11+
// DeletionPolicy defines the strategy the Kubernetes operator uses when you delete a resource, either manually or by a system event.
12+
// You must use one of the following values:
13+
// - `retain`: When you delete the custom resource, the operator does not delete the agent pool.
14+
// - `destroy`: The operator will attempt to remove the managed HCP Terraform agent pool.
15+
type AgentPoolDeletionPolicy string
16+
17+
const (
18+
AgentPoolDeletionPolicyRetain AgentPoolDeletionPolicy = "retain"
19+
AgentPoolDeletionPolicyDestroy AgentPoolDeletionPolicy = "destroy"
20+
)
21+
1122
// Agent Token is a secret token that a HCP Terraform Agent is used to connect to the HCP Terraform Agent Pool.
1223
// In `spec` only the field `Name` is allowed, the rest are used in `status`.
1324
// More infromation:
@@ -133,6 +144,19 @@ type AgentPoolSpec struct {
133144
// Agent deployment settings
134145
//+optional
135146
AgentDeploymentAutoscaling *AgentDeploymentAutoscaling `json:"autoscaling,omitempty"`
147+
148+
// The Deletion Policy specifies the behavior of the custom resource and its associated agent pool when the custom resource is deleted.
149+
// - `retain`: When you delete the custom resource, the operator will remove only the custom resource.
150+
// The HCP Terraform agent pool will be retained. The managed tokens will remain active on the HCP Terraform side; however, the corresponding secrets and managed agents will be removed.
151+
// - `destroy`: The operator will attempt to remove the managed HCP Terraform agent pool.
152+
// On success, the managed agents and the corresponding secret with tokens will be removed along with the custom resource.
153+
// On failure, the managed agents will be scaled down to 0, and the managed tokens, along with the corresponding secret, will be removed. The operator will continue attempting to remove the agent pool until it succeeds.
154+
// Default: `retain`.
155+
//
156+
//+kubebuilder:validation:Enum:=retain;destroy
157+
//+kubebuilder:default=retain
158+
//+optional
159+
DeletionPolicy AgentPoolDeletionPolicy `json:"deletionPolicy,omitempty"`
136160
}
137161

138162
// AgentDeploymentAutoscalingStatus

charts/hcp-terraform-operator/crds/app.terraform.io_agentpools.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8170,6 +8170,20 @@ spec:
81708170
- maxReplicas
81718171
- minReplicas
81728172
type: object
8173+
deletionPolicy:
8174+
default: retain
8175+
description: |-
8176+
The Deletion Policy specifies the behavior of the custom resource and its associated agent pool when the custom resource is deleted.
8177+
- `retain`: When you delete the custom resource, the operator will remove only the custom resource.
8178+
The HCP Terraform agent pool will be retained. The managed tokens will remain active on the HCP Terraform side; however, the corresponding secrets and managed agents will be removed.
8179+
- `destroy`: The operator will attempt to remove the managed HCP Terraform agent pool.
8180+
On success, the managed agents and the corresponding secret with tokens will be removed along with the custom resource.
8181+
On failure, the managed agents will be scaled down to 0, and the managed tokens, along with the corresponding secret, will be removed. The operator will continue attempting to remove the agent pool until it succeeds.
8182+
Default: `retain`.
8183+
enum:
8184+
- retain
8185+
- destroy
8186+
type: string
81738187
name:
81748188
description: |-
81758189
Agent Pool name.

config/crd/bases/app.terraform.io_agentpools.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8167,6 +8167,20 @@ spec:
81678167
- maxReplicas
81688168
- minReplicas
81698169
type: object
8170+
deletionPolicy:
8171+
default: retain
8172+
description: |-
8173+
The Deletion Policy specifies the behavior of the custom resource and its associated agent pool when the custom resource is deleted.
8174+
- `retain`: When you delete the custom resource, the operator will remove only the custom resource.
8175+
The HCP Terraform agent pool will be retained. The managed tokens will remain active on the HCP Terraform side; however, the corresponding secrets and managed agents will be removed.
8176+
- `destroy`: The operator will attempt to remove the managed HCP Terraform agent pool.
8177+
On success, the managed agents and the corresponding secret with tokens will be removed along with the custom resource.
8178+
On failure, the managed agents will be scaled down to 0, and the managed tokens, along with the corresponding secret, will be removed. The operator will continue attempting to remove the agent pool until it succeeds.
8179+
Default: `retain`.
8180+
enum:
8181+
- retain
8182+
- destroy
8183+
type: string
81708184
name:
81718185
description: |-
81728186
Agent Pool name.

docs/api-reference.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,20 @@ More infromation:
104104
| `spec` _[AgentPoolSpec](#agentpoolspec)_ | |
105105

106106

107+
#### AgentPoolDeletionPolicy
108+
109+
_Underlying type:_ _string_
110+
111+
DeletionPolicy defines the strategy the Kubernetes operator uses when you delete a resource, either manually or by a system event.
112+
You must use one of the following values:
113+
- `retain`: When you delete the custom resource, the operator does not delete the agent pool.
114+
- `destroy`: The operator will attempt to remove the managed HCP Terraform agent pool.
115+
116+
_Appears in:_
117+
- [AgentPoolSpec](#agentpoolspec)
118+
119+
120+
107121
#### AgentPoolSpec
108122

109123

@@ -121,6 +135,7 @@ _Appears in:_
121135
| `agentTokens` _[AgentToken](#agenttoken) array_ | List of the agent tokens to generate. |
122136
| `agentDeployment` _[AgentDeployment](#agentdeployment)_ | Agent deployment settings |
123137
| `autoscaling` _[AgentDeploymentAutoscaling](#agentdeploymentautoscaling)_ | Agent deployment settings |
138+
| `deletionPolicy` _[AgentPoolDeletionPolicy](#agentpooldeletionpolicy)_ | The Deletion Policy specifies the behavior of the custom resource and its associated agent pool when the custom resource is deleted.<br />- `retain`: When you delete the custom resource, the operator will remove only the custom resource.<br /> The HCP Terraform agent pool will be retained. The managed tokens will remain active on the HCP Terraform side; however, the corresponding secrets and managed agents will be removed.<br />- `destroy`: The operator will attempt to remove the managed HCP Terraform agent pool.<br /> On success, the managed agents and the corresponding secret with tokens will be removed along with the custom resource.<br /> On failure, the managed agents will be scaled down to 0, and the managed tokens, along with the corresponding secret, will be removed. The operator will continue attempting to remove the agent pool until it succeeds.<br />Default: `retain`. |
124139

125140

126141

internal/controller/agentpool_controller.go

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -212,28 +212,6 @@ func (r *AgentPoolReconciler) updateAgentPool(ctx context.Context, ap *agentPool
212212
return ap.tfClient.Client.AgentPools.Update(ctx, ap.instance.Status.AgentPoolID, options)
213213
}
214214

215-
func (r *AgentPoolReconciler) deleteAgentPool(ctx context.Context, ap *agentPoolInstance) error {
216-
if ap.instance.Status.AgentPoolID == "" {
217-
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("status.agentPoolID is empty, remove finalizer %s", agentPoolFinalizer))
218-
return r.removeFinalizer(ctx, ap)
219-
}
220-
err := ap.tfClient.Client.AgentPools.Delete(ctx, ap.instance.Status.AgentPoolID)
221-
if err != nil {
222-
// if agent pool wasn't found, it means it was deleted from the TF Cloud bypass the operator
223-
// in this case, remove the finalizer and let Kubernetes remove the object permanently
224-
if err == tfc.ErrResourceNotFound {
225-
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("Agent Pool ID %s not found, remove finalizer", agentPoolFinalizer))
226-
return r.removeFinalizer(ctx, ap)
227-
}
228-
ap.log.Error(err, "Reconcile Agent Pool", "msg", fmt.Sprintf("failed to delete Agent Pool ID %s, retry later", agentPoolFinalizer))
229-
r.Recorder.Eventf(&ap.instance, corev1.EventTypeWarning, "ReconcileAgentPool", "Failed to delete Agent Pool ID %s, retry later", ap.instance.Status.AgentPoolID)
230-
return err
231-
}
232-
233-
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("agent pool ID %s has been deleted, remove finalizer", ap.instance.Status.AgentPoolID))
234-
return r.removeFinalizer(ctx, ap)
235-
}
236-
237215
func (r *AgentPoolReconciler) readAgentPool(ctx context.Context, ap *agentPoolInstance) (*tfc.AgentPool, error) {
238216
return ap.tfClient.Client.AgentPools.ReadWithOptions(ctx, ap.instance.Status.AgentPoolID, &tfc.AgentPoolReadOptions{
239217
Include: []tfc.AgentPoolIncludeOpt{

internal/controller/agentpool_controller_autoscaling_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
. "github.com/onsi/ginkgo/v2"
1414
. "github.com/onsi/gomega"
1515
corev1 "k8s.io/api/core/v1"
16+
"k8s.io/apimachinery/pkg/api/errors"
1617
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1718
)
1819

@@ -72,7 +73,12 @@ var _ = Describe("Agent Pool controller", Ordered, func() {
7273

7374
AfterEach(func() {
7475
Expect(tfClient.Workspaces.Delete(ctx, organization, workspace)).To(Succeed())
76+
// Delete Agent Pool CR
7577
Expect(k8sClient.Delete(ctx, instance)).To(Succeed())
78+
Eventually(func() bool {
79+
err := k8sClient.Get(ctx, namespacedName, instance)
80+
return errors.IsNotFound(err)
81+
}).Should(BeTrue())
7682
})
7783

7884
Context("Autoscaling", func() {
@@ -100,6 +106,7 @@ var _ = Describe("Agent Pool controller", Ordered, func() {
100106
return run.Status == tfc.RunApplied
101107
}).Should(BeTrue())
102108
// Create a new Agent Pool
109+
instance.Spec.DeletionPolicy = appv1alpha2.AgentPoolDeletionPolicyDestroy
103110
Expect(k8sClient.Create(ctx, instance)).Should(Succeed())
104111
Eventually(func() bool {
105112
Expect(k8sClient.Get(ctx, namespacedName, instance)).Should(Succeed())
@@ -154,6 +161,7 @@ var _ = Describe("Agent Pool controller", Ordered, func() {
154161
return run.Status == tfc.RunApplied
155162
}).Should(BeTrue())
156163
// New Agent Pool
164+
instance.Spec.DeletionPolicy = appv1alpha2.AgentPoolDeletionPolicyDestroy
157165
Expect(k8sClient.Create(ctx, instance)).Should(Succeed())
158166
Eventually(func() bool {
159167
Expect(k8sClient.Get(ctx, namespacedName, instance)).Should(Succeed())
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Copyright (c) HashiCorp, Inc.
2+
// SPDX-License-Identifier: MPL-2.0
3+
4+
package controller
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"time"
10+
11+
tfc "github.com/hashicorp/go-tfe"
12+
corev1 "k8s.io/api/core/v1"
13+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14+
15+
appv1alpha2 "github.com/hashicorp/hcp-terraform-operator/api/v1alpha2"
16+
)
17+
18+
func (r *AgentPoolReconciler) deleteAgentPool(ctx context.Context, ap *agentPoolInstance) error {
19+
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("deletion policy is %s", ap.instance.Spec.DeletionPolicy))
20+
21+
if ap.instance.Status.AgentPoolID == "" {
22+
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("status.agentPoolID is empty, remove finalizer %s", agentPoolFinalizer))
23+
return r.removeFinalizer(ctx, ap)
24+
}
25+
26+
switch ap.instance.Spec.DeletionPolicy {
27+
case appv1alpha2.AgentPoolDeletionPolicyRetain:
28+
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("remove finalizer %s", agentPoolFinalizer))
29+
return r.removeFinalizer(ctx, ap)
30+
case appv1alpha2.AgentPoolDeletionPolicy(appv1alpha2.DeletionPolicyDestroy):
31+
// Attempt to delete the agent pool first. If successful, no other actions are required.
32+
// Otherwise, scale down the agents to 0 and delete all tokens.
33+
err := ap.tfClient.Client.AgentPools.Delete(ctx, ap.instance.Status.AgentPoolID)
34+
if err != nil {
35+
// If agent pool wasn't found, it means it was deleted from the HCP Terraform bypass the operator.
36+
// In this case, remove the finalizer and let Kubernetes remove the object permanently
37+
if err == tfc.ErrResourceNotFound {
38+
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("agent pool ID %s not found, remove finalizer", agentPoolFinalizer))
39+
return r.removeFinalizer(ctx, ap)
40+
}
41+
ap.log.Error(err, "Reconcile Agent Pool", "msg", fmt.Sprintf("failed to delete Agent Pool ID %s, retry later", agentPoolFinalizer))
42+
r.Recorder.Eventf(&ap.instance, corev1.EventTypeWarning, "ReconcileAgentPool", "Failed to delete Agent Pool ID %s, retry later", ap.instance.Status.AgentPoolID)
43+
// Do not return the error here; proceed further to cale down the agents to 0 and delete all tokens.
44+
} else {
45+
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("agent pool ID %s has been deleted, remove finalizer", ap.instance.Status.AgentPoolID))
46+
return r.removeFinalizer(ctx, ap)
47+
}
48+
// Downscale agents
49+
if ap.instance.Status.AgentDeploymentAutoscalingStatus != nil && ap.instance.Status.AgentDeploymentAutoscalingStatus.DesiredReplicas != nil {
50+
if *ap.instance.Status.AgentDeploymentAutoscalingStatus.DesiredReplicas > 0 {
51+
ap.log.Info("Reconcile Agent Pool", "msg", fmt.Sprintf("scale agents from %d to 0", *ap.instance.Status.AgentDeploymentAutoscalingStatus.DesiredReplicas))
52+
var n int32 = 0
53+
if err := r.scaleAgentDeployment(ctx, ap, &n); err != nil {
54+
ap.log.Error(err, "Reconcile Agent Pool", "msg", "failed to scale agents")
55+
return err
56+
}
57+
ap.instance.Status.AgentDeploymentAutoscalingStatus = &appv1alpha2.AgentDeploymentAutoscalingStatus{
58+
DesiredReplicas: &n,
59+
LastScalingEvent: &metav1.Time{
60+
Time: time.Now(),
61+
},
62+
}
63+
ap.log.Info("Reconcile Agent Pool", "msg", "successfully scaled agents to 0")
64+
}
65+
}
66+
// Remove tokens
67+
if len(ap.instance.Status.AgentTokens) > 0 {
68+
ap.log.Info("Reconcile Agent Pool", "msg", "remove tokens")
69+
for _, t := range ap.instance.Status.AgentTokens {
70+
err := ap.tfClient.Client.AgentTokens.Delete(ctx, t.ID)
71+
if err != nil && err != tfc.ErrResourceNotFound {
72+
ap.log.Error(err, "Reconcile Agent Pool", "msg", fmt.Sprintf("failed to remove token %s", t.ID))
73+
return err
74+
}
75+
err = r.removeToken(ctx, ap, t.ID)
76+
if err != nil {
77+
return err
78+
}
79+
}
80+
ap.log.Info("Reconcile Agent Pool", "msg", "successfully deleted tokens")
81+
}
82+
}
83+
84+
return nil
85+
}

0 commit comments

Comments
 (0)