File tree Expand file tree Collapse file tree 3 files changed +31
-7
lines changed Expand file tree Collapse file tree 3 files changed +31
-7
lines changed Original file line number Diff line number Diff line change 1010 OMPI_MCA_opal_warn_on_missing_libcuda : 0
1111 SLURM_KILL_BAD_EXIT : 1
1212 SLURM_GRES_FLAGS : " allow-task-sharing"
13- JULIA_DEPOT_PATH : " ${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default"
1413
1514steps :
1615 - label : " init :computer:"
Original file line number Diff line number Diff line change @@ -186,10 +186,36 @@ function module_load_string(::DerechoBackend)
186186end
187187
188188function module_load_string (:: GCPBackend )
189- return """ export OPAL_PREFIX="/sw/openmpi-5.0.5"
190- export PATH="/sw/openmpi-5.0.5/bin:\$ PATH"
191- export LD_LIBRARY_PATH="/sw/openmpi-5.0.5/lib:\$ LD_LIBRARY_PATH"
192- export UCX_MEMTYPE_CACHE=y # UCX Memory optimization which toggles whether UCX library intercepts cu*alloc* calls
189+ return """
190+ unset CUDA_ROOT
191+ unset NVHPC_CUDA_HOME
192+ unset CUDA_INC_DIR
193+ unset CPATH
194+ unset NVHPC_ROOT
195+
196+ # NVHPC and HPC-X paths
197+ export NVHPC=/sw/nvhpc/Linux_x86_64/24.5
198+ export HPCX_PATH=\$ {NVHPC}/comm_libs/12.4/hpcx/hpcx-2.19
199+
200+ # CUDA environment
201+ export CUDA_HOME=\$ {NVHPC}/cuda/12.4
202+ export CUDA_PATH=\$ {CUDA_HOME}
203+ export CUDA_ROOT=\$ {CUDA_HOME}
204+
205+ # MPI via MPIwrapper
206+ export MPITRAMPOLINE_LIB="/sw/mpiwrapper/lib/libmpiwrapper.so"
207+ export OPAL_PREFIX=\$ {HPCX_PATH}/ompi
208+
209+ # Library paths - CUDA first, then HPC-X
210+ export LD_LIBRARY_PATH="\$ {CUDA_HOME}/lib64:\$ {HPCX_PATH}/ompi/lib\$ {LD_LIBRARY_PATH:+:\$ {LD_LIBRARY_PATH}}"
211+
212+ # Executable paths
213+ export PATH=/sw/mpiwrapper/bin:\$ {CUDA_HOME}/bin:\$ {PATH}
214+ export PATH="\$ {NVHPC}/profilers/Nsight_Systems/target-linux-x64:\$ {PATH}"
215+
216+ # Julia
217+ export PATH="/sw/julia/julia-1.11.5/bin:\$ {PATH}"
218+ export JULIA_MPI_HAS_CUDA=true
193219 """
194220end
195221
Original file line number Diff line number Diff line change @@ -212,8 +212,7 @@ function generate_sbatch_script(
212212 climacomms_device = gpus_per_task > 0 ? " CUDA" : " CPU"
213213 # TODO : Remove this exception for GCP
214214 mpiexec_string =
215- get_backend () == GCPBackend ?
216- " /sw/openmpi-5.0.5/bin/mpiexec -n $ntasks " :
215+ get_backend () == GCPBackend ? " mpiexec -n $ntasks " :
217216 " srun --output=$member_log --open-mode=append"
218217 sbatch_contents = """
219218 #!/bin/bash
You can’t perform that action at this time.
0 commit comments