@@ -5,14 +5,27 @@ install_cuda_aarch64() {
55 CU_VER=${CU_VERSION: 2: 2} -${CU_VERSION: 4: 1}
66 # CU_VERSION: cu129 --> CU_DOT_VER: 12.9
77 CU_DOT_VER=${CU_VERSION: 2: 2} .${CU_VERSION: 4: 1}
8+ # CUDA_MAJOR_VERSION: cu128 --> 12
9+ CUDA_MAJOR_VERSION=${CU_VERSION: 2: 2}
810 dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
911 # nccl version must match libtorch_cuda.so was built with https://github.com/pytorch/pytorch/blob/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
1012 dnf -y install cuda-compiler-${CU_VER} .aarch64 \
1113 cuda-libraries-${CU_VER} .aarch64 \
1214 cuda-libraries-devel-${CU_VER} .aarch64 \
1315 libnccl-2.27.3-1+cuda${CU_DOT_VER} libnccl-devel-2.27.3-1+cuda${CU_DOT_VER} libnccl-static-2.27.3-1+cuda${CU_DOT_VER}
1416 dnf clean all
15- export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib64:$LD_LIBRARY_PATH
17+
18+ nvshmem_version=3.3.9
19+ nvshmem_path=" https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version} /builds/cuda${CUDA_MAJOR_VERSION} /txz/agnostic/aarch64"
20+ nvshmem_filename=" libnvshmem_cuda12-linux-sbsa-${nvshmem_version} .tar.gz"
21+ curl -L ${nvshmem_path} /${nvshmem_filename} -o nvshmem.tar.gz
22+ tar -xzf nvshmem.tar.gz
23+ cp -a libnvshmem/lib/* /usr/local/cuda/lib64/
24+ cp -a libnvshmem/include/* /usr/local/cuda/include/
25+ rm -rf nvshmem.tar.gz nvshmem
26+ echo " nvshmem ${nvshmem_version} for cuda ${CUDA_MAJOR_VERSION} installed successfully"
27+
28+ export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/include:/usr/lib64:$LD_LIBRARY_PATH
1629 ls -lart /usr/local/
1730 nvcc --version
1831 echo " cuda ${CU_VER} installed successfully"
0 commit comments