Skip to content

Commit 5f76587

Browse files
deliahuvishalbollu
authored andcommitted
Update clustre installation and update logs (#609)
(cherry picked from commit 79dd354)
1 parent 10471e2 commit 5f76587

File tree

1 file changed

+31
-52
lines changed

1 file changed

+31
-52
lines changed

manager/install.sh

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ function ensure_eks() {
5353
envsubst < eks.yaml | eksctl create cluster -f -
5454
fi
5555
fi
56-
echo -e "\n✓ spun up the cluster"
5756

5857
if [ "$CORTEX_SPOT" == "True" ]; then
5958
asg_info=$(aws autoscaling describe-auto-scaling-groups --region $CORTEX_REGION --query 'AutoScalingGroups[?contains(Tags[?Key==`alpha.eksctl.io/nodegroup-name`].Value, `ng-cortex-worker`)]')
@@ -82,20 +81,20 @@ function ensure_eks() {
8281
exit 1
8382
fi
8483

85-
echo "✓ cluster is running"
86-
8784
# Check for change in min/max instances
8885
asg_info=$(aws autoscaling describe-auto-scaling-groups --region $CORTEX_REGION --query 'AutoScalingGroups[?contains(Tags[?Key==`alpha.eksctl.io/nodegroup-name`].Value, `ng-cortex-worker`)]')
8986
asg_name=$(echo "$asg_info" | jq -r 'first | .AutoScalingGroupName')
9087
asg_min_size=$(echo "$asg_info" | jq -r 'first | .MinSize')
9188
asg_max_size=$(echo "$asg_info" | jq -r 'first | .MaxSize')
9289
if [ "$asg_min_size" != "$CORTEX_MIN_INSTANCES" ]; then
90+
echo -n "○ updating min instances to $CORTEX_MIN_INSTANCES "
9391
aws autoscaling update-auto-scaling-group --region $CORTEX_REGION --auto-scaling-group-name $asg_name --min-size=$CORTEX_MIN_INSTANCES
94-
echo " updated min instances to $CORTEX_MIN_INSTANCES"
92+
echo ""
9593
fi
9694
if [ "$asg_max_size" != "$CORTEX_MAX_INSTANCES" ]; then
95+
echo -n "○ updating max instances to $CORTEX_MAX_INSTANCES "
9796
aws autoscaling update-auto-scaling-group --region $CORTEX_REGION --auto-scaling-group-name $asg_name --max-size=$CORTEX_MAX_INSTANCES
98-
echo " updated max instances to $CORTEX_MAX_INSTANCES"
97+
echo ""
9998
fi
10099
}
101100

@@ -107,82 +106,61 @@ function main() {
107106
setup_bucket
108107
setup_cloudwatch_logs
109108

109+
echo -n "○ updating cluster configuration "
110110
envsubst < manifests/namespace.yaml | kubectl apply -f - >/dev/null
111-
112111
setup_configmap
113112
setup_secrets
114-
echo " updated cluster configuration"
113+
echo ""
115114

116115
echo -n "○ configuring networking "
117116
setup_istio
118117
envsubst < manifests/apis.yaml | kubectl apply -f - >/dev/null
119-
echo -e "\n✓ configured networking"
118+
echo ""
120119

120+
echo -n "○ configuring autoscaling "
121121
envsubst < manifests/cluster-autoscaler.yaml | kubectl apply -f - >/dev/null
122-
echo " configured autoscaling"
122+
echo ""
123123

124+
echo -n "○ configuring logging "
124125
kubectl -n=cortex delete --ignore-not-found=true daemonset fluentd >/dev/null 2>&1 # Pods in DaemonSets cannot be modified
125-
if [ "$(kubectl -n=cortex get pods -l app=fluentd -o json | jq -j '.items | length')" -ne "0" ]; then
126-
echo -n "○ configuring logging "
127-
until [ "$(kubectl -n=cortex get pods -l app=fluentd -o json | jq -j '.items | length')" -eq "0" ]; do
128-
echo -n "."
129-
sleep 2
130-
done
131-
echo
132-
fi
126+
until [ "$(kubectl -n=cortex get pods -l app=fluentd -o json | jq -j '.items | length')" -eq "0" ]; do echo -n "."; sleep 2; done
133127
envsubst < manifests/fluentd.yaml | kubectl apply -f - >/dev/null
134-
echo " configured logging"
128+
echo ""
135129

130+
echo -n "○ configuring metrics "
136131
kubectl -n=cortex delete --ignore-not-found=true daemonset cloudwatch-agent-statsd >/dev/null 2>&1 # Pods in DaemonSets cannot be modified
137-
if [ "$(kubectl -n=cortex get pods -l name=cloudwatch-agent-statsd -o json | jq -j '.items | length')" -ne "0" ]; then
138-
echo -n "○ configuring metrics "
139-
until [ "$(kubectl -n=cortex get pods -l name=cloudwatch-agent-statsd -o json | jq -j '.items | length')" -eq "0" ]; do
140-
echo -n "."
141-
sleep 2
142-
done
143-
echo
144-
fi
132+
until [ "$(kubectl -n=cortex get pods -l name=cloudwatch-agent-statsd -o json | jq -j '.items | length')" -eq "0" ]; do echo -n "."; sleep 2; done
145133
envsubst < manifests/metrics-server.yaml | kubectl apply -f - >/dev/null
146134
envsubst < manifests/statsd.yaml | kubectl apply -f - >/dev/null
147-
echo " configured metrics"
135+
echo ""
148136

149137
if [[ "$CORTEX_INSTANCE_TYPE" == p* ]] || [[ "$CORTEX_INSTANCE_TYPE" == g* ]]; then
138+
echo -n "○ configuring gpu support "
150139
kubectl -n=kube-system delete --ignore-not-found=true daemonset nvidia-device-plugin-daemonset >/dev/null 2>&1 # Pods in DaemonSets cannot be modified
151-
if [ "$(kubectl -n=kube-system get pods -l name=nvidia-device-plugin-ds -o json | jq -j '.items | length')" -ne "0" ]; then
152-
echo -n "○ configuring gpu support "
153-
until [ "$(kubectl -n=kube-system get pods -l name=nvidia-device-plugin-ds -o json | jq -j '.items | length')" -eq "0" ]; do
154-
echo -n "."
155-
sleep 2
156-
done
157-
echo
158-
fi
140+
until [ "$(kubectl -n=kube-system get pods -l name=nvidia-device-plugin-ds -o json | jq -j '.items | length')" -eq "0" ]; do echo -n "."; sleep 2; done
159141
envsubst < manifests/nvidia.yaml | kubectl apply -f - >/dev/null
160-
echo " configured gpu support"
142+
echo ""
161143
fi
162144

145+
echo -n "○ starting operator "
163146
kubectl -n=cortex delete --ignore-not-found=true --grace-period=10 deployment operator >/dev/null 2>&1
164-
if [ "$(kubectl -n=cortex get pods -l workloadID=operator -o json | jq -j '.items | length')" -ne "0" ]; then
165-
echo -n "○ starting operator "
166-
until [ "$(kubectl -n=cortex get pods -l workloadID=operator -o json | jq -j '.items | length')" -eq "0" ]; do
167-
echo -n "."
168-
sleep 2
169-
done
170-
echo
171-
fi
147+
until [ "$(kubectl -n=cortex get pods -l workloadID=operator -o json | jq -j '.items | length')" -eq "0" ]; do echo -n "."; sleep 2; done
172148
envsubst < manifests/operator.yaml | kubectl apply -f - >/dev/null
173-
echo " started operator"
149+
echo ""
174150

175151
validate_cortex
176152

153+
echo -n "○ configuring cli "
177154
echo "{\"cortex_url\": \"$operator_endpoint\", \"aws_access_key_id\": \"$CORTEX_AWS_ACCESS_KEY_ID\", \"aws_secret_access_key\": \"$CORTEX_AWS_SECRET_ACCESS_KEY\"}" > /.cortex/default.json
178-
echo " configured cli"
155+
echo ""
179156

180-
echo -e "\n✓ cortex is ready!"
157+
echo -e "\ncortex is ready!"
181158
}
182159

183160
function setup_bucket() {
184161
if ! aws s3api head-bucket --bucket $CORTEX_BUCKET --output json 2>/dev/null; then
185162
if aws s3 ls "s3://$CORTEX_BUCKET" --output json 2>&1 | grep -q 'NoSuchBucket'; then
163+
echo -n "○ creating s3 bucket: $CORTEX_BUCKET "
186164
if [ "$CORTEX_REGION" == "us-east-1" ]; then
187165
aws s3api create-bucket --bucket $CORTEX_BUCKET \
188166
--region $CORTEX_REGION \
@@ -193,22 +171,23 @@ function setup_bucket() {
193171
--create-bucket-configuration LocationConstraint=$CORTEX_REGION \
194172
>/dev/null
195173
fi
196-
echo " created s3 bucket: $CORTEX_BUCKET"
174+
echo ""
197175
else
198176
echo "error: a bucket named \"${CORTEX_BUCKET}\" already exists, but you do not have access to it"
199177
exit 1
200178
fi
201179
else
202-
echo " using existing s3 bucket: $CORTEX_BUCKET"
180+
echo " using existing s3 bucket: $CORTEX_BUCKET"
203181
fi
204182
}
205183

206184
function setup_cloudwatch_logs() {
207185
if ! aws logs list-tags-log-group --log-group-name $CORTEX_LOG_GROUP --region $CORTEX_REGION --output json 2>&1 | grep -q "\"tags\":"; then
186+
echo -n "○ creating cloudwatch log group: $CORTEX_LOG_GROUP "
208187
aws logs create-log-group --log-group-name $CORTEX_LOG_GROUP --region $CORTEX_REGION
209-
echo " created cloudwatch log group: $CORTEX_LOG_GROUP"
188+
echo ""
210189
else
211-
echo " using existing cloudwatch log group: $CORTEX_LOG_GROUP"
190+
echo " using existing cloudwatch log group: $CORTEX_LOG_GROUP"
212191
fi
213192
}
214193

@@ -324,7 +303,7 @@ function validate_cortex() {
324303
break
325304
done
326305

327-
echo -e "\n✓ load balancers are ready"
306+
echo ""
328307
}
329308

330309
main

0 commit comments

Comments
 (0)