@@ -47,40 +47,52 @@ if(CORENRN_ENABLE_GPU)
4747 endif ()
4848 set (CORENRN_CUDA_VERSION_SHORT "${CUDAToolkit_VERSION_MAJOR} .${CUDAToolkit_VERSION_MINOR} " )
4949 endif ()
50- # -acc enables OpenACC support, -cuda links CUDA libraries and (very importantly!) seems to be
51- # required to make the NVHPC compiler do the device code linking. Otherwise the explicit CUDA
52- # device code (.cu files in libcoreneuron) has to be linked in a separate, earlier, step, which
53- # apparently causes problems with interoperability with OpenACC. Passing -cuda to nvc++ when
54- # compiling (as opposed to linking) seems to enable CUDA C++ support, which has other consequences
55- # due to e.g. __CUDACC__ being defined. See https://github.com/BlueBrain/CoreNeuron/issues/607 for
56- # more information about this. -gpu=cudaX.Y ensures that OpenACC code is compiled with the same
57- # CUDA version as is used for the explicit CUDA code.
58- set (NVHPC_ACC_COMP_FLAGS "-acc -Minfo=accel -gpu=cuda${CORENRN_CUDA_VERSION_SHORT} ,lineinfo" )
59- set (NVHPC_ACC_LINK_FLAGS "-acc -cuda" )
60- # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA
61- # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the
62- # same default compute capabilities as each other, particularly on GPU-less build machines.
63- foreach (compute_capability ${CMAKE_CUDA_ARCHITECTURES} )
64- string (APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability} " )
65- endforeach ()
66- if (CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD)
67- # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
68- # for a region then prefer OpenMP.
69- add_compile_definitions (CORENEURON_PREFER_OPENMP_OFFLOAD)
70- string (APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu -Minfo=mp" )
71- string (APPEND NVHPC_ACC_LINK_FLAGS " -mp=gpu" )
50+ if (${CMAKE_CXX_COMPILER_ID} STREQUAL "XLClang" )
51+ set (NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport" )
52+ set (NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper" )
53+
54+ if (CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD)
55+ # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
56+ # for a region then prefer OpenMP.
57+ add_compile_definitions (CORENRN_PREFER_OPENMP_OFFLOAD)
58+ endif ()
59+
60+ elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" )
61+ set (NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS} " )
62+ set (NVHPC_ACC_LINK_FLAGS)
63+ else ()
64+ # -acc enables OpenACC support, -cuda links CUDA libraries and (very importantly!) seems to be
65+ # required to make the NVHPC compiler do the device code linking. Otherwise the explicit CUDA
66+ # device code (.cu files in libcoreneuron) has to be linked in a separate, earlier, step, which
67+ # apparently causes problems with interoperability with OpenACC. Passing -cuda to nvc++ when
68+ # compiling (as opposed to linking) seems to enable CUDA C++ support, which has other consequences
69+ # due to e.g. __CUDACC__ being defined. See https://github.com/BlueBrain/CoreNeuron/issues/607 for
70+ # more information about this. -gpu=cudaX.Y ensures that OpenACC code is compiled with the same
71+ # CUDA version as is used for the explicit CUDA code.
72+ set (NVHPC_ACC_COMP_FLAGS "-acc -Minfo=accel -gpu=cuda${CORENRN_CUDA_VERSION_SHORT} ,lineinfo" )
73+ set (NVHPC_ACC_LINK_FLAGS "-acc -cuda" )
74+ # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA
75+ # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the
76+ # same default compute capabilities as each other, particularly on GPU-less build machines.
77+ foreach (compute_capability ${CMAKE_CUDA_ARCHITECTURES} )
78+ string (APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability} " )
79+ endforeach ()
80+ if (CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD)
81+ # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
82+ # for a region then prefer OpenMP.
83+ add_compile_definitions (CORENEURON_PREFER_OPENMP_OFFLOAD)
84+ string (APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu -Minfo=mp" )
85+ endif ()
86+ # avoid PGI adding standard compliant "-A" flags
87+ # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
88+ string (APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_LINK_FLAGS} " )
89+ # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is
90+ # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors.
91+ # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined
92+ # for OpenACC code generation.
93+ set (NVHPC_CXX_INLINE_FLAGS "-Mautoinline" )
94+ set (NVHPC_CXX_INLINE_FLAGS)
7295 endif ()
73- set (NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS} " )
74- set (NVHPC_ACC_LINK_FLAGS)
75- # avoid PGI adding standard compliant "-A" flags
76- # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
77- string (APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_LINK_FLAGS} " )
78- # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is
79- # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors.
80- # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined
81- # for OpenACC code generation.
82- set (NVHPC_CXX_INLINE_FLAGS "-Mautoinline" )
83- set (NVHPC_CXX_INLINE_FLAGS)
8496endif ()
8597
8698# =============================================================================
0 commit comments