Skip to content

Commit 7ca5fc8

Browse files
authored
Recreate kind cluster on every evg host reboot (#32)
# Summary Evergreen hosts reboot every day or weekend (depending on your configuration). After every reboot, inter-cluster connectivity might be broken for some reason. Recreating the clusters is the only solution we have so far. This is a systemd service that runs on every boot and recreates all clusters (including the kind-kind for single cluster tests). Our tunnel command will now also get the kubeconfig from the host otherwise the tunnel won't open to the (new) ports of the recreated clusters. The systemd service is not created by default. This needs to be explicitly done by running ``` evg_host.sh configure --auto-reboot ``` Remove the architecture flag, and instead left it to be inferred from `uname`. ## Proof of Work Tested locally, but it would be nice if someone can checkout this branch and try it for themselves. ## Checklist - [ ] Have you linked a jira ticket and/or is the ticket in the title? - [ ] Have you checked whether your jira ticket required DOCSP changes? - [ ] Have you checked for release_note changes?
1 parent 7f864b2 commit 7ca5fc8

File tree

5 files changed

+65
-21
lines changed

5 files changed

+65
-21
lines changed

scripts/dev/evg_host.sh

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,24 @@ kubeconfig_path="${HOME}/.operator-dev/evg-host.kubeconfig"
3939

4040
configure() {
4141
shift 1
42-
arch=${1-"amd64"}
42+
auto_recreate="false"
43+
44+
# Parse arguments
45+
while [[ $# -gt 0 ]]; do
46+
case $1 in
47+
--auto-recreate)
48+
auto_recreate="true"
49+
shift
50+
;;
51+
*)
52+
echo "Unknown argument: $1"
53+
echo "Usage: configure [--auto-recreate]"
54+
exit 1
55+
;;
56+
esac
57+
done
4358

44-
echo "Configuring EVG host ${EVG_HOST_NAME} (${host_url}) with architecture ${arch}"
45-
46-
if [[ "${cmd}" == "configure" && "${arch}" != "amd64" && "${arch}" != "arm64" ]]; then
47-
echo "'configure' command supports the following architectures: 'amd64', 'arm64'"
48-
exit 1
49-
fi
59+
echo "Configuring EVG host ${EVG_HOST_NAME} (${host_url}) (auto_recreate: ${auto_recreate})"
5060

5161
ssh -T -q "${host_url}" "sudo chown ubuntu:ubuntu ~/.docker || true; mkdir -p ~/.docker"
5262
if [[ -f "${HOME}/.docker/config.json" ]]; then
@@ -56,7 +66,7 @@ configure() {
5666

5767
sync
5868

59-
ssh -T -q "${host_url}" "cd ~/mongodb-kubernetes; scripts/dev/switch_context.sh root-context; scripts/dev/setup_evg_host.sh ${arch}"
69+
ssh -T -q "${host_url}" "cd ~/mongodb-kubernetes; scripts/dev/switch_context.sh root-context; scripts/dev/setup_evg_host.sh ${auto_recreate}"
6070
}
6171

6272
sync() {
@@ -100,7 +110,7 @@ get-kubeconfig() {
100110

101111
recreate-kind-clusters() {
102112
DELETE_KIND_NETWORK=${DELETE_KIND_NETWORK:-"false"}
103-
configure "${1-"amd64"}" 2>&1| prepend "evg_host.sh configure"
113+
configure 2>&1| prepend "evg_host.sh configure"
104114
echo "Recreating kind clusters on ${EVG_HOST_NAME} (${host_url})..."
105115
# shellcheck disable=SC2088
106116
ssh -T "${host_url}" "cd ~/mongodb-kubernetes; DELETE_KIND_NETWORK=${DELETE_KIND_NETWORK} scripts/dev/recreate_kind_clusters.sh"
@@ -111,7 +121,7 @@ recreate-kind-clusters() {
111121
recreate-kind-cluster() {
112122
shift 1
113123
cluster_name=$1
114-
configure "${1-"amd64"}" 2>&1| prepend "evg_host.sh configure"
124+
configure 2>&1| prepend "evg_host.sh configure"
115125
echo "Recreating kind cluster ${cluster_name} on ${EVG_HOST_NAME} (${host_url})..."
116126
# shellcheck disable=SC2088
117127
ssh -T "${host_url}" "cd ~/mongodb-kubernetes; scripts/dev/recreate_kind_cluster.sh ${cluster_name}"
@@ -121,6 +131,7 @@ recreate-kind-cluster() {
121131

122132
tunnel() {
123133
shift 1
134+
get-kubeconfig
124135
# shellcheck disable=SC2016
125136
api_servers=$(yq '.contexts[].context.cluster as $cluster | .clusters[] | select(.name == $cluster).cluster.server' < "${kubeconfig_path}" | sed 's/https:\/\///g')
126137
echo "Extracted the following API server urls from ${kubeconfig_path}: ${api_servers}"
@@ -187,7 +198,7 @@ PREREQUISITES:
187198
188199
COMMANDS:
189200
recreate-kind-clusters all-you-need to configure host and kind clusters; deletes and recreates all kind clusters (for single and multi runs)
190-
configure <architecture> installs on a host: calls sync, switches context, installs necessary software
201+
configure [--auto-recreate] installs on a host: calls sync, switches context, installs necessary software
191202
sync rsync of project directory
192203
recreate-kind-cluster test-cluster executes scripts/dev/recreate_kind_cluster.sh test-cluster and executes get-kubeconfig
193204
remote-prepare-local-e2e-run executes prepare-local-e2e on the remote evg host
@@ -202,7 +213,7 @@ COMMANDS:
202213

203214
case ${cmd} in
204215
configure) configure "$@" ;;
205-
recreate-kind-clusters) recreate-kind-clusters "${1-"amd64"}";;
216+
recreate-kind-clusters) recreate-kind-clusters "$@" ;;
206217
recreate-kind-cluster) recreate-kind-cluster "$@" ;;
207218
get-kubeconfig) get-kubeconfig ;;
208219
remote-prepare-local-e2e-run) remote-prepare-local-e2e-run ;;

scripts/dev/kindclusters.service

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[Unit]
2+
Description=Recreate kind clusters at boot
3+
After=docker.service
4+
Requires=docker.service
5+
6+
[Service]
7+
Type=oneshot
8+
WorkingDirectory=/home/ubuntu/mongodb-kubernetes
9+
Environment=HOME=/home/ubuntu
10+
ExecStart=/home/ubuntu/mongodb-kubernetes/scripts/dev/recreate_kind_clusters.sh
11+
RemainAfterExit=yes
12+
13+
[Install]
14+
WantedBy=multi-user.target

scripts/dev/setup_evg_host.sh

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
set -Eeou pipefail
66

7+
source scripts/funcs/install
8+
79
set_limits() {
810
echo "Increasing fs.inotify.max_user_instances"
911
sudo sysctl -w fs.inotify.max_user_instances=8192
@@ -26,16 +28,26 @@ set_limits() {
2628
EOF
2729
}
2830

29-
# retrieve arch variable off the shell command line
30-
ARCH=${1-"amd64"}
31+
set_auto_recreate() {
32+
echo "Creating systemd service for recreating kind clusters on reboot"
33+
34+
sudo cp /home/ubuntu/mongodb-kubernetes/scripts/dev/kindclusters.service /etc/systemd/system/kindclusters.service
35+
sudo systemctl enable kindclusters.service
36+
}
37+
38+
# Detect architecture from the environment
39+
ARCH=$(detect_architecture)
40+
echo "Detected architecture: ${ARCH}"
3141

3242
download_kind() {
3343
scripts/evergreen/setup_kind.sh /usr/local
3444
}
3545

36-
download_curl() {
37-
echo "Downloading curl..."
38-
curl -s -o kubectl -L https://dl.k8s.io/release/"$(curl -L -s https://dl.k8s.io/release/stable.txt)"/bin/linux/"${ARCH}"/kubectl
46+
download_kubectl() {
47+
kubectl_version=$(curl --retry 5 -Ls https://dl.k8s.io/release/stable.txt)
48+
echo "Downloading kubectl ${kubectl_version}..."
49+
50+
curl --retry 5 -LOs "https://dl.k8s.io/release/${kubectl_version}/bin/linux/${ARCH}/kubectl"
3951
chmod +x kubectl
4052
sudo mv kubectl /usr/local/bin/kubectl
4153
}
@@ -51,7 +63,12 @@ download_helm() {
5163

5264
set_limits
5365
download_kind &
54-
download_curl &
66+
download_kubectl &
5567
download_helm &
5668

69+
AUTO_RECREATE=${1:-false}
70+
if [[ "${AUTO_RECREATE}" == "true" ]]; then
71+
set_auto_recreate &
72+
fi
73+
5774
wait

scripts/evergreen/setup_kind.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ latest_version="v0.29.0"
1515
if [[ "${arch_suffix}" == "amd64" || "${arch_suffix}" == "arm64" ]]; then
1616
mkdir -p "${PROJECT_DIR}/bin/"
1717
echo "Saving kind to ${PROJECT_DIR}/bin"
18-
curl --retry 3 --silent -L "https://github.com/kubernetes-sigs/kind/releases/download/${latest_version}/kind-${os}-${arch_suffix}" -o kind
18+
curl --retry 5 -L "https://github.com/kubernetes-sigs/kind/releases/download/${latest_version}/kind-${os}-${arch_suffix}" -o kind
1919

2020
chmod +x kind
2121
sudo mv kind "${PROJECT_DIR}/bin"

scripts/evergreen/setup_kubectl.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ bindir="${PROJECT_DIR}/bin"
1212
tmpdir="${PROJECT_DIR}/tmp"
1313
mkdir -p "${bindir}" "${tmpdir}"
1414

15-
echo "Downloading latest kubectl for ${ARCH}"
16-
curl -s --retry 3 -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl"
15+
kubectl_version=$(curl --retry 5 -Ls https://dl.k8s.io/release/stable.txt)
16+
echo "Downloading kubectl ${kubectl_version} for ${ARCH}"
17+
18+
curl --retry 5 -LOs "https://dl.k8s.io/release/${kubectl_version}/bin/linux/${ARCH}/kubectl"
1719
chmod +x kubectl
1820
echo "kubectl version --client"
1921
./kubectl version --client

0 commit comments

Comments
 (0)