@@ -48,6 +48,8 @@ SECRET_PREFIX="{{.SecretPrefix}}"
4848CHUNKS="{{.Chunks}}"
4949FILE="/etc/secret-userdata.txt"
5050FINAL_INDEX=$((CHUNKS - 1))
51+ MAX_RETRIES=10
52+ RETRY_DELAY=10 # in seconds
5153
5254# Log an error and exit.
5355# Args:
@@ -115,6 +117,7 @@ check_aws_command() {
115117 ;;
116118 esac
117119}
120+
118121delete_secret_value() {
119122 local id="${SECRET_PREFIX}-${1}"
120123 local out
@@ -126,19 +129,27 @@ delete_secret_value() {
126129 aws secretsmanager ${ENDPOINT} --region ${REGION} delete-secret --force-delete-without-recovery --secret-id "${id}" 2>&1
127130 )
128131 local delete_return=$?
129- set -o errexit
130- set -o nounset
131- set -o pipefail
132132 check_aws_command "SecretsManager::DeleteSecret" "${delete_return}" "${out}"
133133 if [ ${delete_return} -ne 0 ]; then
134- log::error_exit "Could not delete secret value" 2
134+ log::error "Could not delete secret value"
135+ return 1
135136 fi
136137}
137138
138- delete_secrets() {
139- for i in $(seq 0 ${FINAL_INDEX}); do
140- delete_secret_value "$i"
139+ retry_delete_secret_value() {
140+ local retries=0
141+ while [ ${retries} -lt ${MAX_RETRIES} ]; do
142+ delete_secret_value "$1"
143+ local return_code=$?
144+ if [ ${return_code} -eq 0 ]; then
145+ return 0
146+ else
147+ ((retries++))
148+ log::info "Retrying in ${RETRY_DELAY} seconds..."
149+ sleep ${RETRY_DELAY}
150+ fi
141151 done
152+ return 1
142153}
143154
144155get_secret_value() {
@@ -159,18 +170,33 @@ get_secret_value() {
159170 )
160171 local get_return=$?
161172 check_aws_command "SecretsManager::GetSecretValue" "${get_return}" "${data}"
173+ if [ ${get_return} -ne 0 ]; then
174+ log::error "could not get secret value"
175+ return 1
176+ fi
162177 set -o errexit
163178 set -o nounset
164179 set -o pipefail
165- if [ ${get_return} -ne 0 ]; then
166- log::error "could not get secret value, deleting secret"
167- delete_secrets
168- log::error_exit "could not get secret value, but secret was deleted" 1
169- fi
170180 log::info "appending data to temporary file ${FILE}.gz"
171181 echo "${data}" | base64 -d >>${FILE}.gz
172182}
173183
184+ retry_get_secret_value() {
185+ local retries=0
186+ while [ ${retries} -lt ${MAX_RETRIES} ]; do
187+ get_secret_value "$1"
188+ local return_code=$?
189+ if [ ${return_code} -eq 0 ]; then
190+ return 0
191+ else
192+ ((retries++))
193+ log::info "Retrying in ${RETRY_DELAY} seconds..."
194+ sleep ${RETRY_DELAY}
195+ fi
196+ done
197+ return 1
198+ }
199+
174200log::info "aws.cluster.x-k8s.io encrypted cloud-init script $0 started"
175201log::info "secret prefix: ${SECRET_PREFIX}"
176202log::info "secret count: ${CHUNKS}"
@@ -181,10 +207,21 @@ if test -f "${FILE}"; then
181207fi
182208
183209for i in $(seq 0 "${FINAL_INDEX}"); do
184- get_secret_value "$i"
210+ retry_get_secret_value "$i"
211+ return_code=$?
212+ if [ ${return_code} -ne 0 ]; then
213+ log::error "Failed to get secret value after ${MAX_RETRIES} attempts"
214+ fi
185215done
186216
187- delete_secrets
217+ for i in $(seq 0 ${FINAL_INDEX}); do
218+ retry_delete_secret_value "$i"
219+ return_code=$?
220+ if [ ${return_code} -ne 0 ]; then
221+ log::error "Failed to delete secret value after ${MAX_RETRIES} attempts"
222+ log::error_exit "couldn't delete the secret value, exiting" 1
223+ fi
224+ done
188225
189226log::info "decompressing userdata to ${FILE}"
190227gunzip "${FILE}.gz"
0 commit comments