Skip to content

Commit 0f57966

Browse files
committed
fix: directly referenced images via digest get deleted even if running
This fix adds images to the list of running shas if they are directly referenced by the sha digest and not by tag Original Author: Christian Erhardt <mail@mojo2k.de> Handle Images Referenced by Digest The updated code adds explicit support for images referenced by digest (e.g., @sha256:1234567890abcdef). Reason: In some cases, running containers may reference images directly by their digest instead of by a tag. The original code only handled image references with tags, which could lead to missed matches for digest-referenced images. Extract the Base Repository URI Uses re.search(r"^[^@:]+", running_image).group(0) to extract the base repository URI from running_image, excluding the tag or digest. Reason: Ensures that the base repository URI matches the current repository (repository['repositoryUri']), filtering out irrelevant images. Fallback to Tag Lookup for Digest If the running image is referenced by a tag (e.g., my-repo:latest), the updated code looks up the corresponding digest in the tagged_images list. Reason: Provides a fallback mechanism for identifying the digest when a tag is used, ensuring all relevant digests are captured. Improved Error Handling Catches cases where a tag in running_containers is not found in tagged_images and logs a message. Reason: Addresses scenarios where a running container might use an image tag that no longer exists in the repository, which would otherwise raise an error. Key Changes and Their Purpose 1. Looping Over running_containers Instead of tagged_images Original: Loops through tagged_images and checks if each image matches a running image. Updated: Loops through running_containers and finds corresponding digests in tagged_images. Purpose: Prioritizes the smaller list (running_containers) for iteration, reducing the number of comparisons.
1 parent 0131cc4 commit 0f57966

File tree

1 file changed

+104
-38
lines changed

1 file changed

+104
-38
lines changed

main.py

Lines changed: 104 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -49,28 +49,78 @@ def handler(event, context):
4949
discover_delete_images(REGION)
5050

5151

52-
def discover_delete_images(regionname):
53-
print("Discovering images in " + regionname)
54-
ecr_client = boto3.client('ecr', region_name=regionname)
52+
def get_running_digests_sha_old(running_containers, repository, tagged_images) -> set:
53+
running_digests_sha = set()
54+
for image in tagged_images:
55+
for tag in image['imageTags']:
56+
image_url = repository['repositoryUri'] + ":" + tag
57+
for running_images in running_containers:
58+
if image_url == running_images:
59+
digest = image['imageDigest']
60+
running_digests_sha.add(digest)
61+
return running_digests_sha
62+
63+
64+
def get_running_digests_sha(running_containers, repository, tagged_images) -> set:
65+
running_digests_sha = set()
66+
for running_image in running_containers:
67+
repository_uri = repository['repositoryUri']
68+
69+
# get uri from running image - cut off the tag and digest
70+
# extract the base repository URI from running_image, excluding the tag or digest.
71+
uri = re.search(r"^[^@:]+", running_image).group(0)
72+
if uri != repository_uri:
73+
# Ensures that the base repository URI matches the current repository
74+
# (repository['repositoryUri']), filtering out irrelevant images.
75+
continue
76+
77+
# Get the digest of the running image
78+
79+
# check if image is directly referenced by digest e.g. @sha256:1234567890abcdef
80+
running_digest_match = re.search(r"@([^@]+)$", running_image)
81+
if running_digest_match:
82+
# In some cases, running containers may reference images directly by their digest instead of by a tag.
83+
digest = running_digest_match.group(1)
84+
else:
85+
# the image is referenced by tag - lookup the digest for this tag
86+
tag = running_image.split(":")[1]
87+
image_tags = [x for x in tagged_images if tag in x['imageTags']]
88+
if image_tags:
89+
digest = image_tags[0]['imageDigest']
90+
else:
91+
# A container is using an image that does not exist anymore?
92+
print(f"Error: Image with '{tag=}' not found in tagged images, "
93+
f"Is {running_image=} is using an image that does not exist anymore? ")
94+
continue
95+
96+
if digest:
97+
running_digests_sha.add(digest)
98+
99+
return running_digests_sha
100+
101+
102+
def discover_delete_images(region_name: str):
103+
print("Discovering images in " + region_name)
104+
ecr_client = boto3.client('ecr', region_name=region_name)
55105

56106
repositories = []
57107
describe_repo_paginator = ecr_client.get_paginator('describe_repositories')
58-
for response_listrepopaginator in describe_repo_paginator.paginate():
59-
for repo in response_listrepopaginator['repositories']:
108+
for describe_repo_response in describe_repo_paginator.paginate():
109+
for repo in describe_repo_response['repositories']:
60110
repositories.append(repo)
61111

62-
ecs_client = boto3.client('ecs', region_name=regionname)
112+
ecs_client = boto3.client('ecs', region_name=region_name)
63113

64-
listclusters_paginator = ecs_client.get_paginator('list_clusters')
65114
running_containers = []
66-
for response_listclusterpaginator in listclusters_paginator.paginate():
67-
for cluster in response_listclusterpaginator['clusterArns']:
68-
listtasks_paginator = ecs_client.get_paginator('list_tasks')
69-
for reponse_listtaskpaginator in listtasks_paginator.paginate(cluster=cluster, desiredStatus='RUNNING'):
70-
if reponse_listtaskpaginator['taskArns']:
115+
list_clusters_paginator = ecs_client.get_paginator('list_clusters')
116+
for response_clusters_list_paginator in list_clusters_paginator.paginate():
117+
for cluster in response_clusters_list_paginator['clusterArns']:
118+
list_tasks_paginator = ecs_client.get_paginator('list_tasks')
119+
for list_tasks_response in list_tasks_paginator.paginate(cluster=cluster, desiredStatus='RUNNING'):
120+
if list_tasks_response['taskArns']:
71121
describe_tasks_list = ecs_client.describe_tasks(
72122
cluster=cluster,
73-
tasks=reponse_listtaskpaginator['taskArns']
123+
tasks=list_tasks_response['taskArns']
74124
)
75125

76126
for tasks_list in describe_tasks_list['tasks']:
@@ -83,58 +133,74 @@ def discover_delete_images(regionname):
83133
if container['image'] not in running_containers:
84134
running_containers.append(container['image'])
85135

136+
# example of `image`
137+
# {
138+
# "registryId": "123456789012",
139+
# "repositoryName": "my-repo",
140+
# "imageDigest": "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890",
141+
# "imageTags": ["latest", "v1.0.0"],
142+
# "imagePushedAt": "2025-01-01T12:00:00Z",
143+
# "imageSizeInBytes": 12345678,
144+
# "lastRecordedPullTime": "2025-01-10T15:30:00Z",
145+
# "artifactMediaType": "application/vnd.docker.container.image.v1+json"
146+
# }
147+
# Explanation of Fields
148+
# registryId: The AWS account ID associated with the image.
149+
# repositoryName: The name of the ECR repository where the image is stored.
150+
# imageDigest: A unique identifier for the image, derived from the image's contents (SHA-256 hash).
151+
# imageTags: A list of tags associated with the image, e.g., "latest", "v1.0.0".
152+
# If the image is untagged, this field is absent.
153+
# imagePushedAt: The timestamp of when the image was pushed to the repository.
154+
# imageSizeInBytes: The size of the image in bytes.
155+
# lastRecordedPullTime: The timestamp of the last time the image was pulled from the repository.
156+
# This field may be null if the image has never been pulled.
157+
# artifactMediaType: The media type of the image artifact.
158+
86159
print("Images that are running:")
87160
for image in running_containers:
88161
print(image)
89162

90163
for repository in repositories:
91164
print("------------------------")
92165
print("Starting with repository :" + repository['repositoryUri'])
93-
deletesha = []
94-
deletetag = []
166+
delete_sha = []
167+
delete_tag = []
95168
tagged_images = []
96169

97-
describeimage_paginator = ecr_client.get_paginator('describe_images')
98-
for response_describeimagepaginator in describeimage_paginator.paginate(
170+
describe_image_paginator = ecr_client.get_paginator('describe_images')
171+
for describe_image_response in describe_image_paginator.paginate(
99172
registryId=repository['registryId'],
100173
repositoryName=repository['repositoryName']):
101-
for image in response_describeimagepaginator['imageDetails']:
174+
for image in describe_image_response['imageDetails']:
102175
if 'imageTags' in image:
103176
tagged_images.append(image)
104177
else:
105-
append_to_list(deletesha, image['imageDigest'])
178+
append_to_list(delete_sha, image['imageDigest'])
106179

107-
print("Total number of images found: {}".format(len(tagged_images) + len(deletesha)))
108-
print("Number of untagged images found {}".format(len(deletesha)))
180+
print("Total number of images found: {}".format(len(tagged_images) + len(delete_sha)))
181+
print("Number of untagged images found {}".format(len(delete_sha)))
109182

110183
tagged_images.sort(key=lambda k: k['imagePushedAt'], reverse=True)
111184

112185
# Get ImageDigest from ImageURL for running images. Do this for every repository
113-
running_sha = []
114-
for image in tagged_images:
115-
for tag in image['imageTags']:
116-
imageurl = repository['repositoryUri'] + ":" + tag
117-
for runningimages in running_containers:
118-
if imageurl == runningimages:
119-
if imageurl not in running_sha:
120-
running_sha.append(image['imageDigest'])
121-
122-
print("Number of running images found {}".format(len(running_sha)))
186+
running_digests_sha = get_running_digests_sha(running_containers, repository, tagged_images)
187+
188+
print("Number of running images found {}".format(len(running_digests_sha)))
123189
ignore_tags_regex = re.compile(IGNORE_TAGS_REGEX)
124190
for image in tagged_images:
125191
if tagged_images.index(image) >= IMAGES_TO_KEEP:
126192
for tag in image['imageTags']:
127193
if "latest" not in tag and ignore_tags_regex.search(tag) is None:
128-
if not running_sha or image['imageDigest'] not in running_sha:
129-
append_to_list(deletesha, image['imageDigest'])
130-
append_to_tag_list(deletetag, {"imageUrl": repository['repositoryUri'] + ":" + tag,
194+
if not running_digests_sha or image['imageDigest'] not in running_digests_sha:
195+
append_to_list(delete_sha, image['imageDigest'])
196+
append_to_tag_list(delete_tag, {"imageUrl": repository['repositoryUri'] + ":" + tag,
131197
"pushedAt": image["imagePushedAt"]})
132-
if deletesha:
133-
print("Number of images to be deleted: {}".format(len(deletesha)))
198+
if delete_sha:
199+
print("Number of images to be deleted: {}".format(len(delete_sha)))
134200
delete_images(
135201
ecr_client,
136-
deletesha,
137-
deletetag,
202+
delete_sha,
203+
delete_tag,
138204
repository['registryId'],
139205
repository['repositoryName']
140206
)

0 commit comments

Comments
 (0)