Skip to content

Commit 72320b9

Browse files
author
yefubao
committed
update CUDA_VERSION to 12.8.1
1 parent bc57c76 commit 72320b9

File tree

2 files changed

+18
-14
lines changed

2 files changed

+18
-14
lines changed

docker/Dockerfile.cuda

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG CUDA_VERSION=12.4.1
1+
ARG CUDA_VERSION=12.8.1
22
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
33

44
ENV DEBIAN_FRONTEND noninteractive
@@ -110,14 +110,14 @@ RUN cd ${STAGE_DIR} && \
110110
##############################################################################
111111
# Install cuDNN (NVIDIA CUDA Deep Neural Network library)
112112
##############################################################################
113-
ARG CUDNN_VERSION=9.5.0
113+
ARG CUDNN_VERSION=9.7.1
114114
RUN cd ${STAGE_DIR} && \
115115
CUDNN_MAJOR=$(echo "${CUDNN_VERSION}" | cut -d. -f1) && \
116116
CUDNN_MINOR=$(echo "${CUDNN_VERSION}" | cut -d. -f2) && \
117117
CUDA_MAJOR=$(echo "${CUDA_VERSION}" | cut -d. -f1) && \
118118
CUDA_MINOR=$(echo "${CUDA_VERSION}" | cut -d. -f2) && \
119-
wget https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}.50_cuda${CUDA_MAJOR}-archive.tar.xz && \
120-
tar -xJvf cudnn-linux-x86_64-${CUDNN_VERSION}.50_cuda12-archive.tar.xz && \
119+
wget https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}.26_cuda${CUDA_MAJOR}-archive.tar.xz && \
120+
tar -xJvf cudnn-linux-x86_64-${CUDNN_VERSION}.26_cuda12-archive.tar.xz && \
121121
mkdir -p /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}/include && \
122122
mkdir -p /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}/lib64 && \
123123
cp cudnn-*-archive/include/cudnn*.h /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}/include/ && \

install/install-requirements.sh

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,19 +77,23 @@ cd ..
7777
rm -r ./TransformerEngine
7878

7979
# cudnn frontend
80-
pip install nvidia-cudnn-cu12==9.5.0.50
80+
pip install nvidia-cudnn-cu12==9.7.1.26
8181
CMAKE_ARGS="-DCMAKE_POLICY_VERSION_MINIMUM=3.5" pip install nvidia-cudnn-frontend
8282
python -c "import torch; print('cuDNN version:', torch.backends.cudnn.version());"
8383
python -c "from transformer_engine.pytorch.utils import get_cudnn_version; get_cudnn_version()"
8484

85-
# Megatron-LM requires flash-attn >= 2.1.1, <= 2.7.3
86-
cu=$(nvcc --version | grep "Cuda compilation tools" | awk '{print $5}' | cut -d '.' -f 1)
87-
torch=$(pip show torch | grep Version | awk '{print $2}' | cut -d '+' -f 1 | cut -d '.' -f 1,2)
88-
cp=$(python3 --version | awk '{print $2}' | awk -F. '{print $1$2}')
89-
cxx=$(g++ --version | grep 'g++' | awk '{print $3}' | cut -d '.' -f 1)
90-
wget https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu${cu}torch${torch}cxx${cxx}abiFALSE-cp${cp}-cp${cp}-linux_x86_64.whl
91-
pip install flash_attn-2.7.3+cu${cu}torch${torch}cxx${cxx}abiFALSE-cp${cp}-cp${cp}-linux_x86_64.whl
92-
rm flash_attn-2.7.3+cu${cu}torch${torch}cxx${cxx}abiFALSE-cp${cp}-cp${cp}-linux_x86_64.whl
85+
# # Megatron-LM requires flash-attn >= 2.1.1, <= 2.7.3
86+
# cu=$(nvcc --version | grep "Cuda compilation tools" | awk '{print $5}' | cut -d '.' -f 1)
87+
# torch=$(pip show torch | grep Version | awk '{print $2}' | cut -d '+' -f 1 | cut -d '.' -f 1,2)
88+
# cp=$(python3 --version | awk '{print $2}' | awk -F. '{print $1$2}')
89+
# cxx=$(g++ --version | grep 'g++' | awk '{print $3}' | cut -d '.' -f 1)
90+
# wget https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu${cu}torch${torch}cxx${cxx}abiFALSE-cp${cp}-cp${cp}-linux_x86_64.whl
91+
# pip install flash_attn-2.7.3+cu${cu}torch${torch}cxx${cxx}abiFALSE-cp${cp}-cp${cp}-linux_x86_64.whl
92+
# rm flash_attn-2.7.3+cu${cu}torch${torch}cxx${cxx}abiFALSE-cp${cp}-cp${cp}-linux_x86_64.whl
93+
wget https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2+cu12torch2.7cxx11abiFALSE-cp312-cp312-linux_x86_64.whl
94+
pip install flash_attn-2.8.0.post2+cu12torch2.7cxx11abiFALSE-cp312-cp312-linux_x86_64.whl
95+
rm flash_attn-2.8.0.post2+cu12torch2.7cxx11abiFALSE-cp312-cp312-linux_x86_64.whl
96+
9397

9498
# From Megatron-LM log
9599
pip install "git+https://github.com/Dao-AILab/flash-attention.git@v2.7.2#egg=flashattn-hopper&subdirectory=hopper"
@@ -153,7 +157,7 @@ if [ "${env}" == "train" ]; then
153157
fi
154158

155159
# Replace the following code with torch version 2.6.0
156-
if [[ $torch_version == *"2.6.0"* ]];then
160+
if [[ $torch_version == *"2.6.0"* ]] || [[ $torch_version == *"2.7.0"* ]];then
157161
# Check and replace line 908
158162
LINE_908=$(sed -n '908p' "$FILE")
159163
EXPECTED_908=' if num_nodes_waiting > 0:'

0 commit comments

Comments
 (0)