Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .buildkite/clima_gpu_pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ env:
OMPI_MCA_opal_warn_on_missing_libcuda: 0
SLURM_KILL_BAD_EXIT: 1
SLURM_GRES_FLAGS: "allow-task-sharing"
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default"

steps:
- label: "init :computer:"
Expand Down
34 changes: 30 additions & 4 deletions src/backends.jl
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,36 @@ function module_load_string(::DerechoBackend)
end

function module_load_string(::GCPBackend)
return """export OPAL_PREFIX="/sw/openmpi-5.0.5"
export PATH="/sw/openmpi-5.0.5/bin:\$PATH"
export LD_LIBRARY_PATH="/sw/openmpi-5.0.5/lib:\$LD_LIBRARY_PATH"
export UCX_MEMTYPE_CACHE=y # UCX Memory optimization which toggles whether UCX library intercepts cu*alloc* calls
return """
unset CUDA_ROOT
unset NVHPC_CUDA_HOME
unset CUDA_INC_DIR
unset CPATH
unset NVHPC_ROOT

# NVHPC and HPC-X paths
export NVHPC=/sw/nvhpc/Linux_x86_64/24.5
export HPCX_PATH=\${NVHPC}/comm_libs/12.4/hpcx/hpcx-2.19

# CUDA environment
export CUDA_HOME=\${NVHPC}/cuda/12.4
export CUDA_PATH=\${CUDA_HOME}
export CUDA_ROOT=\${CUDA_HOME}

# MPI via MPIwrapper
export MPITRAMPOLINE_LIB="/sw/mpiwrapper/lib/libmpiwrapper.so"
export OPAL_PREFIX=\${HPCX_PATH}/ompi

# Library paths - CUDA first, then HPC-X
export LD_LIBRARY_PATH="\${CUDA_HOME}/lib64:\${HPCX_PATH}/ompi/lib\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}"

# Executable paths
export PATH=/sw/mpiwrapper/bin:\${CUDA_HOME}/bin:\${PATH}
export PATH="\${NVHPC}/profilers/Nsight_Systems/target-linux-x64:\${PATH}"
Comment on lines +213 to +214
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why one of the lines have "..." and the other one doesn't?


# Julia
export PATH="/sw/julia/julia-1.11.5/bin:\${PATH}"
export JULIA_MPI_HAS_CUDA=true
"""
end

Expand Down
3 changes: 1 addition & 2 deletions src/slurm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,7 @@ function generate_sbatch_script(
climacomms_device = gpus_per_task > 0 ? "CUDA" : "CPU"
# TODO: Remove this exception for GCP
mpiexec_string =
get_backend() == GCPBackend ?
"/sw/openmpi-5.0.5/bin/mpiexec -n $ntasks" :
get_backend() == GCPBackend ? "mpiexec -n $ntasks" :
"srun --output=$member_log --open-mode=append"
sbatch_contents = """
#!/bin/bash
Expand Down
Loading