Skip to content

Commit 917e498

Browse files
committed
Update environment and MPI call for GCPBackend
1 parent 6a431fd commit 917e498

File tree

3 files changed

+31
-7
lines changed

3 files changed

+31
-7
lines changed

.buildkite/clima_gpu_pipeline.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ env:
1010
OMPI_MCA_opal_warn_on_missing_libcuda: 0
1111
SLURM_KILL_BAD_EXIT: 1
1212
SLURM_GRES_FLAGS: "allow-task-sharing"
13-
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default"
1413

1514
steps:
1615
- label: "init :computer:"

src/backends.jl

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,36 @@ function module_load_string(::DerechoBackend)
186186
end
187187

188188
function module_load_string(::GCPBackend)
189-
return """export OPAL_PREFIX="/sw/openmpi-5.0.5"
190-
export PATH="/sw/openmpi-5.0.5/bin:\$PATH"
191-
export LD_LIBRARY_PATH="/sw/openmpi-5.0.5/lib:\$LD_LIBRARY_PATH"
192-
export UCX_MEMTYPE_CACHE=y # UCX Memory optimization which toggles whether UCX library intercepts cu*alloc* calls
189+
return """
190+
unset CUDA_ROOT
191+
unset NVHPC_CUDA_HOME
192+
unset CUDA_INC_DIR
193+
unset CPATH
194+
unset NVHPC_ROOT
195+
196+
# NVHPC and HPC-X paths
197+
export NVHPC=/sw/nvhpc/Linux_x86_64/24.5
198+
export HPCX_PATH=\${NVHPC}/comm_libs/12.4/hpcx/hpcx-2.19
199+
200+
# CUDA environment
201+
export CUDA_HOME=\${NVHPC}/cuda/12.4
202+
export CUDA_PATH=\${CUDA_HOME}
203+
export CUDA_ROOT=\${CUDA_HOME}
204+
205+
# MPI via MPIwrapper
206+
export MPITRAMPOLINE_LIB="/sw/mpiwrapper/lib/libmpiwrapper.so"
207+
export OPAL_PREFIX=\${HPCX_PATH}/ompi
208+
209+
# Library paths - CUDA first, then HPC-X
210+
export LD_LIBRARY_PATH="\${CUDA_HOME}/lib64:\${HPCX_PATH}/ompi/lib\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}"
211+
212+
# Executable paths
213+
export PATH=/sw/mpiwrapper/bin:\${CUDA_HOME}/bin:\${PATH}
214+
export PATH="\${NVHPC}/profilers/Nsight_Systems/target-linux-x64:\${PATH}"
215+
216+
# Julia
217+
export PATH="/sw/julia/julia-1.11.5/bin:\${PATH}"
218+
export JULIA_MPI_HAS_CUDA=true
193219
"""
194220
end
195221

src/slurm.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,7 @@ function generate_sbatch_script(
212212
climacomms_device = gpus_per_task > 0 ? "CUDA" : "CPU"
213213
# TODO: Remove this exception for GCP
214214
mpiexec_string =
215-
get_backend() == GCPBackend ?
216-
"/sw/openmpi-5.0.5/bin/mpiexec -n $ntasks" :
215+
get_backend() == GCPBackend ? "mpiexec -n $ntasks" :
217216
"srun --output=$member_log --open-mode=append"
218217
sbatch_contents = """
219218
#!/bin/bash

0 commit comments

Comments
 (0)