@@ -29,6 +29,7 @@ import (
2929 "time"
3030
3131 "github.com/aws/aws-sdk-go-v2/aws"
32+ "github.com/aws/aws-sdk-go-v2/service/ec2"
3233 ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
3334
3435 elb "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing"
@@ -1082,24 +1083,24 @@ func (c *Cloud) ensureLoadBalancer(ctx context.Context, namespacedName types.Nam
10821083
10831084 {
10841085 // Sync security groups
1085- expected := sets .New [ string ] (securityGroupIDs ... )
1086- actual := stringSetFromList (loadBalancer .SecurityGroups )
1086+ expected := sets .New (securityGroupIDs ... )
1087+ actual := sets . New (loadBalancer .SecurityGroups ... )
10871088
10881089 if ! expected .Equal (actual ) {
10891090 // This call just replaces the security groups, unlike e.g. subnets (!)
1090- request := & elb.ApplySecurityGroupsToLoadBalancerInput {}
1091- request .LoadBalancerName = aws .String (loadBalancerName )
1092- if securityGroupIDs == nil {
1093- request .SecurityGroups = nil
1094- } else {
1095- request .SecurityGroups = securityGroupIDs
1096- }
1097- klog .V (2 ).Info ("Applying updated security groups to load balancer" )
1098- _ , err := c .elb .ApplySecurityGroupsToLoadBalancer (ctx , request )
1099- if err != nil {
1091+ klog .V (2 ).Infof ("Applying updated security groups to load balancer %q" , loadBalancerName )
1092+ if _ , err := c .elb .ApplySecurityGroupsToLoadBalancer (ctx , & elb.ApplySecurityGroupsToLoadBalancerInput {
1093+ LoadBalancerName : aws .String (loadBalancerName ),
1094+ SecurityGroups : securityGroupIDs ,
1095+ }); err != nil {
11001096 return nil , fmt .Errorf ("error applying AWS loadbalancer security groups: %q" , err )
11011097 }
11021098 dirty = true
1099+
1100+ // Ensure the replaced security groups are removed from AWS when owned by the controller.
1101+ if errs := c .removeOwnedSecurityGroups (ctx , loadBalancerName , actual .UnsortedList ()); len (errs ) > 0 {
1102+ return nil , fmt .Errorf ("error removing owned security groups: %v" , errs )
1103+ }
11031104 }
11041105 }
11051106
@@ -1704,3 +1705,143 @@ func ValidateHealthCheck(s *elbtypes.HealthCheck) error {
17041705
17051706 return nil
17061707}
1708+
1709+ // isOwnedSecurityGroup checks if the security group is owned by the controller
1710+ // by checking if the security group has the cluster ownership tag
1711+ // (kubernetes.io/cluster/<clusterID>=owned).
1712+ //
1713+ // Parameters:
1714+ // - `ctx`: The context for the operation.
1715+ // - `securityGroupID`: The ID of the security group to check.
1716+ //
1717+ // Returns:
1718+ // - `bool`: True if the security group is owned by the controller, false otherwise.
1719+ // - `error`: An error if the security group cannot be retrieved, is not found,
1720+ // or if multiple security groups are found with the same ID (unexpected).
1721+ func (c * Cloud ) isOwnedSecurityGroup (ctx context.Context , securityGroupID string ) (bool , error ) {
1722+ groups , err := c .ec2 .DescribeSecurityGroups (ctx , & ec2.DescribeSecurityGroupsInput {
1723+ GroupIds : []string {securityGroupID },
1724+ })
1725+ if err != nil {
1726+ return false , fmt .Errorf ("error retrieving security group %q: %w" , securityGroupID , err )
1727+ }
1728+ if len (groups ) == 0 {
1729+ return false , fmt .Errorf ("security group %q not found" , securityGroupID )
1730+ }
1731+ if len (groups ) != 1 {
1732+ // This should not be possible - ids should be unique
1733+ return false , fmt .Errorf ("[BUG] multiple security groups(%d) found with same id %q" , len (groups ), securityGroupID )
1734+ }
1735+ return c .tagging .hasClusterTagOwned (groups [0 ].Tags )
1736+ }
1737+
1738+ // buildSecurityGroupRuleReferences finds all security groups that have ingress rules
1739+ // referencing the specified security group ID, and categorizes them based on cluster tagging.
1740+ // This is used to identify dependencies before removing a security group.
1741+ //
1742+ // Parameters:
1743+ // - ctx: The context for the request.
1744+ // - sgID: The ID of the security group to find references for.
1745+ //
1746+ // Returns:
1747+ // - map[*ec2types.SecurityGroup]bool: All security groups with ingress rules referencing sgID, mapped to their cluster tag status (true/false).
1748+ // - map[*ec2types.SecurityGroup]IPPermissionSet: Only cluster-tagged security groups mapped to their ingress rules that reference sgID.
1749+ // - error: An error if the AWS DescribeSecurityGroups API call fails.
1750+ func (c * Cloud ) buildSecurityGroupRuleReferences (ctx context.Context , sgID string ) (map [* ec2types.SecurityGroup ]bool , map [* ec2types.SecurityGroup ]IPPermissionSet , error ) {
1751+ groupsHasTags := make (map [* ec2types.SecurityGroup ]bool )
1752+ groupsLinkedPermissions := make (map [* ec2types.SecurityGroup ]IPPermissionSet )
1753+ sgsOut , err := c .ec2 .DescribeSecurityGroups (ctx , & ec2.DescribeSecurityGroupsInput {
1754+ Filters : []ec2types.Filter {
1755+ newEc2Filter ("ip-permission.group-id" , sgID ),
1756+ },
1757+ })
1758+ if err != nil {
1759+ return groupsHasTags , groupsLinkedPermissions , fmt .Errorf ("error querying security groups for ELB: %q" , err )
1760+ }
1761+
1762+ for _ , sg := range sgsOut {
1763+ groupsHasTags [& sg ] = c .tagging .hasClusterTag (sg .Tags )
1764+
1765+ groupsLinkedPermissions [& sg ] = NewIPPermissionSet ()
1766+ for _ , rule := range sg .IpPermissions {
1767+ if rule .UserIdGroupPairs != nil {
1768+ for _ , pair := range rule .UserIdGroupPairs {
1769+ if pair .GroupId != nil && aws .ToString (pair .GroupId ) == sgID {
1770+ groupsLinkedPermissions [& sg ].Insert (rule )
1771+ }
1772+ }
1773+ }
1774+ }
1775+
1776+ }
1777+ return groupsHasTags , groupsLinkedPermissions , nil
1778+ }
1779+
1780+ // removeOwnedSecurityGroups removes the CLB owned/managed security groups from AWS.
1781+ // It revokes ingress rules that reference the security groups to be removed,
1782+ // then deletes the security groups that are owned by the controller.
1783+ // This is used when updating load balancer security groups to clean up orphaned ones.
1784+ //
1785+ // Parameters:
1786+ // - `ctx`: The context for the operation.
1787+ // - `loadBalancerName`: The name of the load balancer (used for logging and deletion operations).
1788+ // - `securityGroups`: The list of security group IDs to process for removal.
1789+ //
1790+ // Returns:
1791+ // - `[]error`: Collection of all errors encountered during the removal process.
1792+ func (c * Cloud ) removeOwnedSecurityGroups (ctx context.Context , loadBalancerName string , securityGroups []string ) []error {
1793+ allErrs := []error {}
1794+ sgMap := make (map [string ]struct {})
1795+
1796+ // Validate each security group references building a reading list to be deleted.
1797+ for _ , sg := range securityGroups {
1798+ isOwned , err := c .isOwnedSecurityGroup (ctx , sg )
1799+ if err != nil {
1800+ allErrs = append (allErrs , fmt .Errorf ("unable to validate if security group %q is owned by the controller: %w" , sg , err ))
1801+ continue
1802+ }
1803+
1804+ groupsWithClusterTag , groupsLinkedPermissions , err := c .buildSecurityGroupRuleReferences (ctx , sg )
1805+ if err != nil {
1806+ allErrs = append (allErrs , fmt .Errorf ("error building security group rule references for %q: %w" , sg , err ))
1807+ continue
1808+ }
1809+
1810+ // Revoke ingress rules referencing the security group to be deleted
1811+ // from cluster-tagged security groups, when the referenced security
1812+ // group has no cluster tag, skip the revoke assuming it is user-managed.
1813+ for sgTarget , sgPerms := range groupsLinkedPermissions {
1814+ if ! groupsWithClusterTag [sgTarget ] {
1815+ klog .Warningf ("security group %q has no cluster tag, skipping remove lifecycle after update" , sg )
1816+ continue
1817+ }
1818+
1819+ klog .Infof ("revoking security group ingress references of %q from %q" , sg , aws .ToString (sgTarget .GroupId ))
1820+ if _ , err := c .ec2 .RevokeSecurityGroupIngress (ctx , & ec2.RevokeSecurityGroupIngressInput {
1821+ GroupId : sgTarget .GroupId ,
1822+ IpPermissions : sgPerms .List (),
1823+ }); err != nil {
1824+ allErrs = append (allErrs , fmt .Errorf ("error revoking security group ingress rules from %q: %w" , aws .ToString (sgTarget .GroupId ), err ))
1825+ continue
1826+ }
1827+ }
1828+
1829+ // Skip security group removal when the security group is not owned by the controller.
1830+ if ! isOwned {
1831+ klog .Warningf ("security group %q is not owned by the controller, skipping remove lifecycle after update" , sg )
1832+ continue
1833+ }
1834+
1835+ klog .Infof ("making loadbalancer owned security group %q ready for deletion" , sg )
1836+ sgMap [sg ] = struct {}{}
1837+ }
1838+ if len (sgMap ) == 0 {
1839+ return allErrs
1840+ }
1841+
1842+ if err := c .deleteSecurityGroupsWithBackoff (ctx , loadBalancerName , sgMap ); err != nil {
1843+ return append (allErrs , fmt .Errorf ("error deleting security groups %v: %v" , sgMap , err ))
1844+ }
1845+ klog .Infof ("loadbalancer owned security groups deleted!" )
1846+ return nil
1847+ }
0 commit comments