@@ -133,33 +133,12 @@ jobs:
133133 # region Free up disk space
134134
135135 - name : Free up additional disk space
136+ uses : ./.github/actions/free-up-disk-space
136137 # https://docs.github.com/en/actions/learn-github-actions/expressions
137138 # NOTE: the arm64 GitHub hosted runner does not have the /mnt-mounted scratch disk
138139 if : " ${{ contains(inputs.target, 'rocm') || contains(inputs.target, 'cuda') ||
139140 contains(inputs.target, 'pytorch') || contains(inputs.target, 'tensorflow') ||
140141 inputs.platform == 'linux/arm64' }}"
141- run : |
142- set -x
143-
144- df -h
145-
146- sudo apt-get update
147- sudo apt-get purge -y '^dotnet-.*' '^llvm-.*' 'php.*' '^mongodb-.*'
148- sudo apt-get autoremove -y --purge
149- sudo apt-get clean
150- sudo rm -rf /usr/local/.ghcup &
151- sudo rm -rf /usr/local/lib/android &
152- sudo rm -rf /usr/local/share/boost &
153- sudo rm -rf /usr/local/lib/node_modules &
154- sudo rm -rf /usr/share/dotnet &
155- sudo rm -rf /opt/ghc &
156- sudo rm -rf /opt/hostedtoolcache/CodeQL &
157-
158- sudo docker image prune --all --force &
159-
160- wait
161-
162- df -h
163142
164143 - id : install-compsize
165144 run : sudo apt-get install -y btrfs-compsize
@@ -398,164 +377,9 @@ jobs:
398377 ln -s ../rocm-tensorflow runtimes/rocm/tensorflow
399378 ln -s ../rocm-pytorch runtimes/rocm/pytorch
400379
401- # https://cri-o.io/
402- - name : Install cri-o
403- id : install-crio
404- if : ${{ steps.have-tests.outputs.tests == 'true' }}
405- run : |
406- set -Eeuxo pipefail
407-
408- # the Microsoft repo's kubelet does not provide /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
409- # [Service]
410- # EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
411- # ExecStart=/usr/bin/kubelet $KUBELET_KUBEADM_ARGS
412- sudo ls /etc/apt/sources.list.d/
413- sudo rm /etc/apt/sources.list.d/microsoft-prod.list
414-
415- sudo apt-get update
416- sudo apt-get install -y software-properties-common curl
417-
418- # https://github.com/cri-o/packaging?tab=readme-ov-file#distributions-using-deb-packages
419-
420- curl -fsSL https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/Release.key | \
421- sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
422-
423- echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/ /" | \
424- sudo tee /etc/apt/sources.list.d/kubernetes.list
425-
426- curl -fsSL https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/Release.key | \
427- sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/cri-o-apt-keyring.gpg
428-
429- echo "deb [signed-by=/etc/apt/keyrings/cri-o-apt-keyring.gpg] https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/ /" | \
430- sudo tee /etc/apt/sources.list.d/cri-o.list
431-
432- sudo apt-get update
433-
434- # [ERROR FileExisting-conntrack]: conntrack not found in system path
435- # see man apt-patterns for the ~name=version* syntax
436-
437- # The following packages will be DOWNGRADED:
438- # kubectl
439- # E: Packages were downgraded and -y was used without --allow-downgrades.
440-
441- sudo apt-get install -y --allow-downgrades \
442- "cri-o=${CRIO_VERSION}.*" \
443- "kubelet=${KUBERNETES_VERSION}.*" "kubeadm=${KUBERNETES_VERSION}.*" "kubectl=${KUBERNETES_VERSION}.*" \
444- conntrack
445-
446- # make use of /etc/cni/net.d/11-crio-ipv4-bridge.conflist so we don't
447- # need a pod network and just use the default bridge
448- sudo rm -rf /etc/cni/net.d/*
449- # cat /etc/cni/net.d/11-crio-ipv4-bridge.conflist
450- # https://github.com/containerd/containerd/blob/main/script%2Fsetup%2Finstall-cni
451- # https://www.cni.dev/plugins/current/main/bridge/
452- sudo cp ci/cached-builds/11-crio-ipv4-bridge.conflist /etc/cni/net.d/11-crio-ipv4-bridge.conflist
453-
454- sudo cp ci/cached-builds/crio.conf /etc/crio/crio.conf.d/
455-
456- sudo systemctl daemon-reload
457- sudo systemctl start crio.service
458- env :
459- # TODO(jdanek): install also "cri-tools=${CRIO_VERSION}.*" when updating to 1.33
460- CRIO_VERSION : 1.32
461- # This has to be kept in sync with the packages above, otherwise
462- # [ERROR KubeletVersion]: the kubelet version is higher than the control plane version.
463- # This is not a supported version skew and may lead to a malfunctional cluster.
464- # Kubelet version: "1.33.0" Control plane version: "1.30.12"
465- KUBERNETES_VERSION : 1.33
466- # Also update version in kubeadm.yaml
467-
468- - run : sudo crictl info
380+ - name : Provision K8s cluster
469381 if : ${{ steps.have-tests.outputs.tests == 'true' }}
470-
471- - name : Show crio debug data (on failure)
472- if : ${{ failure() && steps.have-tests.outputs.tests == 'true' }}
473- run : |
474- set -Eeuxo pipefail
475-
476- sudo systemctl status crio.service || true
477- sudo journalctl -xeu crio.service
478-
479- # do this early, it's a good check that cri-o is not completely broken
480- - name : " Show crio images information"
481- if : ${{ steps.have-tests.outputs.tests == 'true' }}
482- run : sudo crictl images
483-
484- - name : Install Kubernetes cluster
485- if : ${{ steps.have-tests.outputs.tests == 'true' }}
486- run : |
487- set -Eeuxo pipefail
488-
489- sudo swapoff -a
490- sudo modprobe br_netfilter
491- sudo sysctl -w net.ipv4.ip_forward=1
492-
493- # Was getting strange DNS resolution errors from pods that don't seem to want to go away sometimes:
494- # Resolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Name or service not known.
495- # wget: unable to resolve host address ‘raw.githubusercontent.com’
496- # Here's what helped:
497- # https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/#known-issues
498- # https://github.com/kubernetes/kubernetes/blob/e4c1f980b76fecece30c2f77885a7117192170a6/CHANGELOG/CHANGELOG-1.30.md?plain=1#L1454
499- # https://github.com/canonical/microk8s/issues/68#issuecomment-404923563
500- sudo ufw allow in on cni0
501- sudo ufw allow out on cni0
502- sudo ufw default allow routed
503- sudo iptables -P FORWARD ACCEPT
504- sudo iptables -t nat -A POSTROUTING -s 10.85.0.0/16 -o eth0 -j MASQUERADE
505-
506- sudo kubeadm reset -f --cri-socket=unix:///var/run/crio/crio.sock
507-
508- # https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm
509- sudo kubeadm init --config=ci/cached-builds/kubeadm.yaml
510-
511- mkdir -p $HOME/.kube
512- sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
513- sudo chown $(id -u):$(id -g) $HOME/.kube/config
514-
515- - name : Show kubelet debug data (on failure)
516- if : ${{ failure() && steps.have-tests.outputs.tests == 'true' && steps.install-crio.outcome == 'success' }}
517- run : |
518- set -Eeuxo pipefail
519-
520- # [kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
521- sudo cat /var/lib/kubelet/kubeadm-flags.env || true
522- # [kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
523- sudo cat /var/lib/kubelet/config.yaml || true
524-
525- sudo systemctl cat kubelet.service || true
526-
527- sudo cat /etc/systemd/system/kubelet.service.d/10-kubeadm.conf || true
528-
529- sudo systemctl status kubelet || true
530- sudo journalctl -xeu kubelet
531-
532- # Here is one example how you may list all running Kubernetes containers by using crictl:
533- sudo crictl --runtime-endpoint unix:///var/run/crio/crio.sock ps -a | grep kube | grep -v pause
534- # Once you have found the failing container, you can inspect its logs with:
535- # crictl --runtime-endpoint unix:///var/run/crio/crio.sock logs CONTAINERID
536-
537- - name : Show nodes status and wait for readiness
538- if : ${{ steps.have-tests.outputs.tests == 'true' }}
539- run : |
540- kubectl describe nodes
541- kubectl wait --for=condition=Ready nodes --all --timeout=100s || (kubectl describe nodes && false)
542-
543- - name : Wait for pods to be running
544- if : ${{ steps.have-tests.outputs.tests == 'true' }}
545- run : |
546- set -Eeuxo pipefail
547- kubectl wait deployments --all --all-namespaces --for=condition=Available --timeout=100s
548- kubectl wait pods --all --all-namespaces --for=condition=Ready --timeout=100s
549-
550- - name : " Install local-path provisioner"
551- if : ${{ steps.have-tests.outputs.tests == 'true' }}
552- run : |
553- set -Eeuxo pipefail
554- kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.31/deploy/local-path-storage.yaml
555- kubectl wait deployments --all --namespace=local-path-storage --for=condition=Available --timeout=100s
556- # https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/
557- kubectl get storageclass
558- kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
382+ uses : ./.github/actions/provision-k8s
559383
560384 - name : " Run image tests"
561385 # skip on s390x because we are unable to install requirements-elyra.txt that's installed by runtime image tests
0 commit comments