Skip to content

Commit cc5e7e9

Browse files
committed
🐛 fix: use cluster tag key to list managed egress-only internet gateway
The API for DescribeEgressOnlyInternetGateways does not support attachment.vpc-id filter. Thus, the call will return all available eigw. Consequences: - CAPA incorrectly selects an unintended eigw for use. Leading to route creation failure since the eigw belongs to a different VPC. - CAPA incorrectly destroys all eigw of all VPCs. This is very catastrophic as it can break other workloads. This commit changes the filter to use cluster tag instead. Additional safeguard is also included to check if the eigw is truly attached the VPC.
1 parent bd8766a commit cc5e7e9

File tree

2 files changed

+78
-14
lines changed

2 files changed

+78
-14
lines changed

pkg/cloud/services/network/egress_only_gateways.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,21 +136,41 @@ func (s *Service) createEgressOnlyInternetGateway() (*types.EgressOnlyInternetGa
136136
}
137137

138138
func (s *Service) describeEgressOnlyVpcInternetGateways() ([]types.EgressOnlyInternetGateway, error) {
139+
// The API for DescribeEgressOnlyInternetGateways does not support filtering by VPC ID attachment.
140+
// More details: https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeEgressOnlyInternetGateways.html
141+
// Since the eigw is managed by CAPA, we can filter by the kubernetes cluster tag.
139142
out, err := s.EC2Client.DescribeEgressOnlyInternetGateways(context.TODO(), &ec2.DescribeEgressOnlyInternetGatewaysInput{
140143
Filters: []types.Filter{
141-
filter.EC2.VPCAttachment(s.scope.VPC().ID),
144+
filter.EC2.Cluster(s.scope.Name()),
142145
},
143146
})
144147
if err != nil {
145148
record.Eventf(s.scope.InfraCluster(), "FailedDescribeEgressOnlyInternetGateway", "Failed to describe egress only internet gateway in vpc %q: %v", s.scope.VPC().ID, err)
146149
return nil, errors.Wrapf(err, "failed to describe egress only internet gateways in vpc %q", s.scope.VPC().ID)
147150
}
148151

149-
if len(out.EgressOnlyInternetGateways) == 0 {
152+
// For safeguarding, we collect only egress-only internet gateways
153+
// that are attached to the VPC.
154+
eigws := make([]types.EgressOnlyInternetGateway, 0)
155+
for _, eigw := range out.EgressOnlyInternetGateways {
156+
for _, attachment := range eigw.Attachments {
157+
if aws.ToString(attachment.VpcId) == s.scope.VPC().ID {
158+
eigws = append(eigws, eigw)
159+
}
160+
}
161+
}
162+
163+
if len(eigws) == 0 {
150164
return nil, awserrors.NewNotFound(fmt.Sprintf("no egress only internet gateways found in vpc %q", s.scope.VPC().ID))
165+
} else if len(eigws) > 1 {
166+
eigwIDs := make([]string, len(eigws))
167+
for i, eigw := range eigws {
168+
eigwIDs[i] = aws.ToString(eigw.EgressOnlyInternetGatewayId)
169+
}
170+
return nil, awserrors.NewConflict(fmt.Sprintf("expected 1 egress only internet gateway in vpc %q, but found %v: %v", s.scope.VPC().ID, len(eigws), eigwIDs))
151171
}
152172

153-
return out.EgressOnlyInternetGateways, nil
173+
return eigws, nil
154174
}
155175

156176
func (s *Service) getEgressOnlyGatewayTagParams(id string) infrav1.BuildParams {

pkg/cloud/services/network/egress_only_gateways_test.go

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ func TestReconcileEgressOnlyInternetGateways(t *testing.T) {
4040
defer mockCtrl.Finish()
4141

4242
testCases := []struct {
43-
name string
44-
input *infrav1.NetworkSpec
45-
expect func(m *mocks.MockEC2APIMockRecorder)
43+
name string
44+
input *infrav1.NetworkSpec
45+
expect func(m *mocks.MockEC2APIMockRecorder)
46+
wantErrContaining *string
4647
}{
4748
{
4849
name: "has eigw",
@@ -75,6 +76,44 @@ func TestReconcileEgressOnlyInternetGateways(t *testing.T) {
7576
Return(nil, nil)
7677
},
7778
},
79+
{
80+
name: "has more than 1 eigw, should return error",
81+
input: &infrav1.NetworkSpec{
82+
VPC: infrav1.VPCSpec{
83+
ID: "vpc-egress-only-gateways",
84+
IPv6: &infrav1.IPv6{},
85+
Tags: infrav1.Tags{
86+
infrav1.ClusterTagKey("test-cluster"): "owned",
87+
},
88+
},
89+
},
90+
wantErrContaining: aws.String("expected 1 egress only internet gateway in vpc \"vpc-egress-only-gateways\", but found 2: [eigw-0 eigw-1]"),
91+
expect: func(m *mocks.MockEC2APIMockRecorder) {
92+
m.DescribeEgressOnlyInternetGateways(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeEgressOnlyInternetGatewaysInput{})).
93+
Return(&ec2.DescribeEgressOnlyInternetGatewaysOutput{
94+
EgressOnlyInternetGateways: []types.EgressOnlyInternetGateway{
95+
{
96+
EgressOnlyInternetGatewayId: aws.String("eigw-0"),
97+
Attachments: []types.InternetGatewayAttachment{
98+
{
99+
State: types.AttachmentStatusAttached,
100+
VpcId: aws.String("vpc-egress-only-gateways"),
101+
},
102+
},
103+
},
104+
{
105+
EgressOnlyInternetGatewayId: aws.String("eigw-1"),
106+
Attachments: []types.InternetGatewayAttachment{
107+
{
108+
State: types.AttachmentStatusAttached,
109+
VpcId: aws.String("vpc-egress-only-gateways"),
110+
},
111+
},
112+
},
113+
},
114+
}, nil)
115+
},
116+
},
78117
{
79118
name: "no eigw attached, creates one",
80119
input: &infrav1.NetworkSpec{
@@ -122,10 +161,13 @@ func TestReconcileEgressOnlyInternetGateways(t *testing.T) {
122161

123162
for _, tc := range testCases {
124163
t.Run(tc.name, func(t *testing.T) {
164+
g := NewWithT(t)
125165
ec2Mock := mocks.NewMockEC2API(mockCtrl)
126166

127167
scheme := runtime.NewScheme()
128-
_ = infrav1.AddToScheme(scheme)
168+
err := infrav1.AddToScheme(scheme)
169+
g.Expect(err).NotTo(HaveOccurred())
170+
129171
client := fake.NewClientBuilder().WithScheme(scheme).Build()
130172
scope, err := scope.NewClusterScope(scope.ClusterScopeParams{
131173
Client: client,
@@ -139,18 +181,20 @@ func TestReconcileEgressOnlyInternetGateways(t *testing.T) {
139181
},
140182
},
141183
})
142-
if err != nil {
143-
t.Fatalf("Failed to create test context: %v", err)
144-
}
184+
g.Expect(err).NotTo(HaveOccurred())
145185

146186
tc.expect(ec2Mock.EXPECT())
147187

148188
s := NewService(scope)
149189
s.EC2Client = ec2Mock
150190

151-
if err := s.reconcileEgressOnlyInternetGateways(); err != nil {
152-
t.Fatalf("got an unexpected error: %v", err)
191+
err = s.reconcileEgressOnlyInternetGateways()
192+
if tc.wantErrContaining != nil {
193+
g.Expect(err).To(HaveOccurred())
194+
g.Expect(err.Error()).To(ContainSubstring(*tc.wantErrContaining))
195+
return
153196
}
197+
g.Expect(err).NotTo(HaveOccurred())
154198
})
155199
}
156200
}
@@ -199,8 +243,8 @@ func TestDeleteEgressOnlyInternetGateways(t *testing.T) {
199243
m.DescribeEgressOnlyInternetGateways(context.TODO(), gomock.Eq(&ec2.DescribeEgressOnlyInternetGatewaysInput{
200244
Filters: []types.Filter{
201245
{
202-
Name: aws.String("attachment.vpc-id"),
203-
Values: []string{"vpc-gateways"},
246+
Name: aws.String("tag-key"),
247+
Values: []string{infrav1.ClusterTagKey("test-cluster")},
204248
},
205249
},
206250
})).Return(&ec2.DescribeEgressOnlyInternetGatewaysOutput{}, nil)

0 commit comments

Comments
 (0)